|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9985553308292401, |
|
"eval_steps": 100, |
|
"global_step": 432, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.023114706732158336, |
|
"grad_norm": 65.15836334228516, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits/chosen": -0.335565984249115, |
|
"logits/rejected": -0.31526079773902893, |
|
"logps/chosen": -269.28985595703125, |
|
"logps/rejected": -267.5926818847656, |
|
"loss": 2.6152, |
|
"nll_loss": 0.7412666082382202, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -26.92898941040039, |
|
"rewards/margins": -0.1697184145450592, |
|
"rewards/rejected": -26.7592716217041, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04622941346431667, |
|
"grad_norm": 55.07333755493164, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -0.3471914827823639, |
|
"logits/rejected": -0.32920125126838684, |
|
"logps/chosen": -260.79205322265625, |
|
"logps/rejected": -267.349853515625, |
|
"loss": 2.5239, |
|
"nll_loss": 0.7186842560768127, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -26.079208374023438, |
|
"rewards/margins": 0.6557787656784058, |
|
"rewards/rejected": -26.734989166259766, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06934412019647501, |
|
"grad_norm": 57.19869613647461, |
|
"learning_rate": 6.818181818181817e-07, |
|
"logits/chosen": -0.34477299451828003, |
|
"logits/rejected": -0.33347639441490173, |
|
"logps/chosen": -247.47900390625, |
|
"logps/rejected": -250.7107391357422, |
|
"loss": 2.3552, |
|
"nll_loss": 0.703576922416687, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -24.74790382385254, |
|
"rewards/margins": 0.3231719732284546, |
|
"rewards/rejected": -25.071073532104492, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09245882692863334, |
|
"grad_norm": 47.48102569580078, |
|
"learning_rate": 9.09090909090909e-07, |
|
"logits/chosen": -0.5700438618659973, |
|
"logits/rejected": -0.556909441947937, |
|
"logps/chosen": -215.1627197265625, |
|
"logps/rejected": -217.0400848388672, |
|
"loss": 2.1715, |
|
"nll_loss": 0.6503027081489563, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -21.51627540588379, |
|
"rewards/margins": 0.18773558735847473, |
|
"rewards/rejected": -21.704008102416992, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11557353366079168, |
|
"grad_norm": 48.25373458862305, |
|
"learning_rate": 9.845360824742267e-07, |
|
"logits/chosen": -0.8266013264656067, |
|
"logits/rejected": -0.8015046119689941, |
|
"logps/chosen": -196.6488800048828, |
|
"logps/rejected": -195.6967010498047, |
|
"loss": 2.1841, |
|
"nll_loss": 0.5290184020996094, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": -19.664888381958008, |
|
"rewards/margins": -0.09521917253732681, |
|
"rewards/rejected": -19.56966781616211, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13868824039295002, |
|
"grad_norm": 55.580039978027344, |
|
"learning_rate": 9.587628865979382e-07, |
|
"logits/chosen": -0.6845192313194275, |
|
"logits/rejected": -0.689314067363739, |
|
"logps/chosen": -164.92901611328125, |
|
"logps/rejected": -165.1588592529297, |
|
"loss": 2.0022, |
|
"nll_loss": 0.4657168388366699, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -16.492902755737305, |
|
"rewards/margins": 0.022982392460107803, |
|
"rewards/rejected": -16.515884399414062, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16180294712510834, |
|
"grad_norm": 50.51268768310547, |
|
"learning_rate": 9.329896907216495e-07, |
|
"logits/chosen": -0.5050565004348755, |
|
"logits/rejected": -0.4807310998439789, |
|
"logps/chosen": -155.29498291015625, |
|
"logps/rejected": -157.361328125, |
|
"loss": 1.9881, |
|
"nll_loss": 0.44492220878601074, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": -15.529500007629395, |
|
"rewards/margins": 0.20663371682167053, |
|
"rewards/rejected": -15.736132621765137, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1849176538572667, |
|
"grad_norm": 46.45564651489258, |
|
"learning_rate": 9.072164948453608e-07, |
|
"logits/chosen": -0.48326191306114197, |
|
"logits/rejected": -0.457420289516449, |
|
"logps/chosen": -158.08729553222656, |
|
"logps/rejected": -161.24571228027344, |
|
"loss": 1.8567, |
|
"nll_loss": 0.42924928665161133, |
|
"rewards/accuracies": 0.5093749761581421, |
|
"rewards/chosen": -15.80872917175293, |
|
"rewards/margins": 0.31584271788597107, |
|
"rewards/rejected": -16.124568939208984, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.208032360589425, |
|
"grad_norm": 50.26318359375, |
|
"learning_rate": 8.814432989690721e-07, |
|
"logits/chosen": -0.4506359100341797, |
|
"logits/rejected": -0.43782296776771545, |
|
"logps/chosen": -152.2831573486328, |
|
"logps/rejected": -160.30429077148438, |
|
"loss": 1.7674, |
|
"nll_loss": 0.4159914553165436, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -15.228317260742188, |
|
"rewards/margins": 0.8021124005317688, |
|
"rewards/rejected": -16.03042984008789, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.23114706732158335, |
|
"grad_norm": 45.81875991821289, |
|
"learning_rate": 8.556701030927834e-07, |
|
"logits/chosen": -0.40928536653518677, |
|
"logits/rejected": -0.39079341292381287, |
|
"logps/chosen": -153.24673461914062, |
|
"logps/rejected": -156.20919799804688, |
|
"loss": 1.9362, |
|
"nll_loss": 0.4179740846157074, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -15.324671745300293, |
|
"rewards/margins": 0.29624658823013306, |
|
"rewards/rejected": -15.620920181274414, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23114706732158335, |
|
"eval_logits/chosen": -0.4377523362636566, |
|
"eval_logits/rejected": -0.4122772812843323, |
|
"eval_logps/chosen": -149.33935546875, |
|
"eval_logps/rejected": -152.84754943847656, |
|
"eval_loss": 1.7930248975753784, |
|
"eval_nll_loss": 0.40668219327926636, |
|
"eval_rewards/accuracies": 0.5760869383811951, |
|
"eval_rewards/chosen": -14.933935165405273, |
|
"eval_rewards/margins": 0.35081860423088074, |
|
"eval_rewards/rejected": -15.28475284576416, |
|
"eval_runtime": 74.3015, |
|
"eval_samples_per_second": 24.576, |
|
"eval_steps_per_second": 1.548, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2542617740537417, |
|
"grad_norm": 45.55659103393555, |
|
"learning_rate": 8.298969072164948e-07, |
|
"logits/chosen": -0.38547706604003906, |
|
"logits/rejected": -0.3579915165901184, |
|
"logps/chosen": -146.1110382080078, |
|
"logps/rejected": -150.4032745361328, |
|
"loss": 1.7214, |
|
"nll_loss": 0.39803242683410645, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -14.611104011535645, |
|
"rewards/margins": 0.42922306060791016, |
|
"rewards/rejected": -15.040326118469238, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27737648078590005, |
|
"grad_norm": 44.77095031738281, |
|
"learning_rate": 8.041237113402062e-07, |
|
"logits/chosen": -0.376223623752594, |
|
"logits/rejected": -0.3552733063697815, |
|
"logps/chosen": -155.74005126953125, |
|
"logps/rejected": -157.14755249023438, |
|
"loss": 1.753, |
|
"nll_loss": 0.4237498342990875, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": -15.574007034301758, |
|
"rewards/margins": 0.14074988663196564, |
|
"rewards/rejected": -15.714755058288574, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.30049118751805837, |
|
"grad_norm": 54.516483306884766, |
|
"learning_rate": 7.783505154639175e-07, |
|
"logits/chosen": -0.39556393027305603, |
|
"logits/rejected": -0.3727474808692932, |
|
"logps/chosen": -152.9895477294922, |
|
"logps/rejected": -161.13479614257812, |
|
"loss": 1.8165, |
|
"nll_loss": 0.42241328954696655, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -15.298955917358398, |
|
"rewards/margins": 0.8145230412483215, |
|
"rewards/rejected": -16.11347770690918, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3236058942502167, |
|
"grad_norm": 58.50905227661133, |
|
"learning_rate": 7.525773195876288e-07, |
|
"logits/chosen": -0.41800642013549805, |
|
"logits/rejected": -0.41197213530540466, |
|
"logps/chosen": -143.42355346679688, |
|
"logps/rejected": -148.9073486328125, |
|
"loss": 1.8037, |
|
"nll_loss": 0.41033467650413513, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -14.342355728149414, |
|
"rewards/margins": 0.5483782291412354, |
|
"rewards/rejected": -14.890734672546387, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.34672060098237506, |
|
"grad_norm": 59.64632034301758, |
|
"learning_rate": 7.268041237113402e-07, |
|
"logits/chosen": -0.40256112813949585, |
|
"logits/rejected": -0.3912666440010071, |
|
"logps/chosen": -143.48622131347656, |
|
"logps/rejected": -148.83050537109375, |
|
"loss": 1.8835, |
|
"nll_loss": 0.41666117310523987, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -14.34862232208252, |
|
"rewards/margins": 0.5344293117523193, |
|
"rewards/rejected": -14.883050918579102, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3698353077145334, |
|
"grad_norm": 41.37995529174805, |
|
"learning_rate": 7.010309278350515e-07, |
|
"logits/chosen": -0.3729507327079773, |
|
"logits/rejected": -0.34710609912872314, |
|
"logps/chosen": -155.8257598876953, |
|
"logps/rejected": -159.4755096435547, |
|
"loss": 1.7067, |
|
"nll_loss": 0.41083773970603943, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -15.582575798034668, |
|
"rewards/margins": 0.36497658491134644, |
|
"rewards/rejected": -15.947550773620605, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3929500144466917, |
|
"grad_norm": 50.4566535949707, |
|
"learning_rate": 6.752577319587629e-07, |
|
"logits/chosen": -0.3252796530723572, |
|
"logits/rejected": -0.31979063153266907, |
|
"logps/chosen": -154.66848754882812, |
|
"logps/rejected": -161.5574951171875, |
|
"loss": 1.6017, |
|
"nll_loss": 0.42361512780189514, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -15.46684741973877, |
|
"rewards/margins": 0.6889010071754456, |
|
"rewards/rejected": -16.15574836730957, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.41606472117885, |
|
"grad_norm": 48.24229431152344, |
|
"learning_rate": 6.494845360824742e-07, |
|
"logits/chosen": -0.3405265212059021, |
|
"logits/rejected": -0.33944639563560486, |
|
"logps/chosen": -147.56602478027344, |
|
"logps/rejected": -154.09613037109375, |
|
"loss": 1.6478, |
|
"nll_loss": 0.424372136592865, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -14.756604194641113, |
|
"rewards/margins": 0.653009295463562, |
|
"rewards/rejected": -15.409612655639648, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4391794279110084, |
|
"grad_norm": 50.57717514038086, |
|
"learning_rate": 6.237113402061855e-07, |
|
"logits/chosen": -0.3636409640312195, |
|
"logits/rejected": -0.3508070111274719, |
|
"logps/chosen": -156.1150360107422, |
|
"logps/rejected": -162.10330200195312, |
|
"loss": 1.7155, |
|
"nll_loss": 0.4282284379005432, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -15.611505508422852, |
|
"rewards/margins": 0.5988240838050842, |
|
"rewards/rejected": -16.210330963134766, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4622941346431667, |
|
"grad_norm": 44.41514205932617, |
|
"learning_rate": 5.979381443298969e-07, |
|
"logits/chosen": -0.32660025358200073, |
|
"logits/rejected": -0.3209044337272644, |
|
"logps/chosen": -156.2790985107422, |
|
"logps/rejected": -162.4671173095703, |
|
"loss": 1.7019, |
|
"nll_loss": 0.4315672516822815, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -15.627909660339355, |
|
"rewards/margins": 0.6188000440597534, |
|
"rewards/rejected": -16.2467098236084, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4622941346431667, |
|
"eval_logits/chosen": -0.3579607307910919, |
|
"eval_logits/rejected": -0.3357972204685211, |
|
"eval_logps/chosen": -154.3026885986328, |
|
"eval_logps/rejected": -160.1311492919922, |
|
"eval_loss": 1.678566575050354, |
|
"eval_nll_loss": 0.4193345308303833, |
|
"eval_rewards/accuracies": 0.6086956262588501, |
|
"eval_rewards/chosen": -15.430268287658691, |
|
"eval_rewards/margins": 0.5828461647033691, |
|
"eval_rewards/rejected": -16.01311492919922, |
|
"eval_runtime": 74.1864, |
|
"eval_samples_per_second": 24.614, |
|
"eval_steps_per_second": 1.55, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.48540884137532503, |
|
"grad_norm": 51.62085723876953, |
|
"learning_rate": 5.721649484536082e-07, |
|
"logits/chosen": -0.3630141615867615, |
|
"logits/rejected": -0.3378238081932068, |
|
"logps/chosen": -150.49215698242188, |
|
"logps/rejected": -152.28367614746094, |
|
"loss": 1.6739, |
|
"nll_loss": 0.41899624466896057, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -15.049214363098145, |
|
"rewards/margins": 0.17915421724319458, |
|
"rewards/rejected": -15.228368759155273, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5085235481074833, |
|
"grad_norm": 49.88188552856445, |
|
"learning_rate": 5.463917525773195e-07, |
|
"logits/chosen": -0.37590575218200684, |
|
"logits/rejected": -0.3511108160018921, |
|
"logps/chosen": -159.89659118652344, |
|
"logps/rejected": -165.49131774902344, |
|
"loss": 1.7447, |
|
"nll_loss": 0.42955484986305237, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -15.989659309387207, |
|
"rewards/margins": 0.5594727993011475, |
|
"rewards/rejected": -16.549131393432617, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5316382548396418, |
|
"grad_norm": 46.68313217163086, |
|
"learning_rate": 5.20618556701031e-07, |
|
"logits/chosen": -0.37392115592956543, |
|
"logits/rejected": -0.3575811982154846, |
|
"logps/chosen": -162.5522918701172, |
|
"logps/rejected": -168.78067016601562, |
|
"loss": 1.7586, |
|
"nll_loss": 0.4414497911930084, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -16.255229949951172, |
|
"rewards/margins": 0.6228369474411011, |
|
"rewards/rejected": -16.878068923950195, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5547529615718001, |
|
"grad_norm": 54.655609130859375, |
|
"learning_rate": 4.948453608247422e-07, |
|
"logits/chosen": -0.3484077453613281, |
|
"logits/rejected": -0.3337170481681824, |
|
"logps/chosen": -159.63836669921875, |
|
"logps/rejected": -164.4112091064453, |
|
"loss": 1.6017, |
|
"nll_loss": 0.4336668848991394, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -15.963836669921875, |
|
"rewards/margins": 0.47728481888771057, |
|
"rewards/rejected": -16.44112205505371, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5778676683039584, |
|
"grad_norm": 50.76809310913086, |
|
"learning_rate": 4.6907216494845357e-07, |
|
"logits/chosen": -0.30525675415992737, |
|
"logits/rejected": -0.2880803048610687, |
|
"logps/chosen": -156.4806365966797, |
|
"logps/rejected": -159.4465789794922, |
|
"loss": 1.7451, |
|
"nll_loss": 0.4165531098842621, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -15.648063659667969, |
|
"rewards/margins": 0.29659539461135864, |
|
"rewards/rejected": -15.944659233093262, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6009823750361167, |
|
"grad_norm": 51.902610778808594, |
|
"learning_rate": 4.432989690721649e-07, |
|
"logits/chosen": -0.3701649308204651, |
|
"logits/rejected": -0.3554461896419525, |
|
"logps/chosen": -152.5877685546875, |
|
"logps/rejected": -160.40426635742188, |
|
"loss": 1.6025, |
|
"nll_loss": 0.4253969192504883, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -15.258776664733887, |
|
"rewards/margins": 0.7816492319107056, |
|
"rewards/rejected": -16.04042625427246, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.624097081768275, |
|
"grad_norm": 44.464599609375, |
|
"learning_rate": 4.175257731958763e-07, |
|
"logits/chosen": -0.3865426182746887, |
|
"logits/rejected": -0.3753945231437683, |
|
"logps/chosen": -153.08734130859375, |
|
"logps/rejected": -159.94705200195312, |
|
"loss": 1.628, |
|
"nll_loss": 0.4174048900604248, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -15.308735847473145, |
|
"rewards/margins": 0.6859728097915649, |
|
"rewards/rejected": -15.994707107543945, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6472117885004334, |
|
"grad_norm": 50.29905700683594, |
|
"learning_rate": 3.917525773195876e-07, |
|
"logits/chosen": -0.35409292578697205, |
|
"logits/rejected": -0.3260190784931183, |
|
"logps/chosen": -154.6301727294922, |
|
"logps/rejected": -163.79635620117188, |
|
"loss": 1.6203, |
|
"nll_loss": 0.4250774383544922, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -15.463017463684082, |
|
"rewards/margins": 0.916618824005127, |
|
"rewards/rejected": -16.379634857177734, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6703264952325917, |
|
"grad_norm": 54.7519416809082, |
|
"learning_rate": 3.659793814432989e-07, |
|
"logits/chosen": -0.42501506209373474, |
|
"logits/rejected": -0.39394429326057434, |
|
"logps/chosen": -159.5155487060547, |
|
"logps/rejected": -164.74307250976562, |
|
"loss": 1.5987, |
|
"nll_loss": 0.4190928339958191, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -15.951556205749512, |
|
"rewards/margins": 0.5227512717247009, |
|
"rewards/rejected": -16.474306106567383, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6934412019647501, |
|
"grad_norm": 44.03036880493164, |
|
"learning_rate": 3.402061855670103e-07, |
|
"logits/chosen": -0.4323659837245941, |
|
"logits/rejected": -0.4210866391658783, |
|
"logps/chosen": -163.0435333251953, |
|
"logps/rejected": -172.29119873046875, |
|
"loss": 1.6388, |
|
"nll_loss": 0.4356729984283447, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -16.304353713989258, |
|
"rewards/margins": 0.9247667193412781, |
|
"rewards/rejected": -17.229122161865234, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6934412019647501, |
|
"eval_logits/chosen": -0.38277825713157654, |
|
"eval_logits/rejected": -0.35816264152526855, |
|
"eval_logps/chosen": -155.46498107910156, |
|
"eval_logps/rejected": -162.12692260742188, |
|
"eval_loss": 1.6232643127441406, |
|
"eval_nll_loss": 0.4229773283004761, |
|
"eval_rewards/accuracies": 0.613043487071991, |
|
"eval_rewards/chosen": -15.546499252319336, |
|
"eval_rewards/margins": 0.6661920547485352, |
|
"eval_rewards/rejected": -16.212690353393555, |
|
"eval_runtime": 74.1312, |
|
"eval_samples_per_second": 24.632, |
|
"eval_steps_per_second": 1.551, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7165559086969084, |
|
"grad_norm": 47.341087341308594, |
|
"learning_rate": 3.1443298969072163e-07, |
|
"logits/chosen": -0.4356638789176941, |
|
"logits/rejected": -0.4280335307121277, |
|
"logps/chosen": -164.1811065673828, |
|
"logps/rejected": -167.7774200439453, |
|
"loss": 1.6949, |
|
"nll_loss": 0.4244704246520996, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -16.41811180114746, |
|
"rewards/margins": 0.3596319258213043, |
|
"rewards/rejected": -16.77774429321289, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7396706154290668, |
|
"grad_norm": 43.78164291381836, |
|
"learning_rate": 2.8865979381443296e-07, |
|
"logits/chosen": -0.4178016781806946, |
|
"logits/rejected": -0.40296635031700134, |
|
"logps/chosen": -152.5771484375, |
|
"logps/rejected": -160.88571166992188, |
|
"loss": 1.6922, |
|
"nll_loss": 0.4172099232673645, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -15.257715225219727, |
|
"rewards/margins": 0.8308565020561218, |
|
"rewards/rejected": -16.088571548461914, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7627853221612251, |
|
"grad_norm": 48.753013610839844, |
|
"learning_rate": 2.6288659793814435e-07, |
|
"logits/chosen": -0.4328450560569763, |
|
"logits/rejected": -0.43247896432876587, |
|
"logps/chosen": -153.868896484375, |
|
"logps/rejected": -160.49305725097656, |
|
"loss": 1.6731, |
|
"nll_loss": 0.4279722571372986, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -15.38688850402832, |
|
"rewards/margins": 0.6624161601066589, |
|
"rewards/rejected": -16.049304962158203, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7859000288933834, |
|
"grad_norm": 48.8376350402832, |
|
"learning_rate": 2.3711340206185566e-07, |
|
"logits/chosen": -0.4575740694999695, |
|
"logits/rejected": -0.44574373960494995, |
|
"logps/chosen": -157.2711944580078, |
|
"logps/rejected": -161.98927307128906, |
|
"loss": 1.5679, |
|
"nll_loss": 0.4292600154876709, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -15.727119445800781, |
|
"rewards/margins": 0.4718071520328522, |
|
"rewards/rejected": -16.198925018310547, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8090147356255417, |
|
"grad_norm": 46.211063385009766, |
|
"learning_rate": 2.11340206185567e-07, |
|
"logits/chosen": -0.44085240364074707, |
|
"logits/rejected": -0.44065386056900024, |
|
"logps/chosen": -157.3097686767578, |
|
"logps/rejected": -166.4695281982422, |
|
"loss": 1.6698, |
|
"nll_loss": 0.4102792739868164, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -15.730977058410645, |
|
"rewards/margins": 0.9159765243530273, |
|
"rewards/rejected": -16.646953582763672, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8321294423577, |
|
"grad_norm": 52.41377639770508, |
|
"learning_rate": 1.8556701030927835e-07, |
|
"logits/chosen": -0.4241538941860199, |
|
"logits/rejected": -0.4094991087913513, |
|
"logps/chosen": -160.124267578125, |
|
"logps/rejected": -165.10821533203125, |
|
"loss": 1.7134, |
|
"nll_loss": 0.42789340019226074, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -16.012426376342773, |
|
"rewards/margins": 0.4983920156955719, |
|
"rewards/rejected": -16.510820388793945, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8552441490898585, |
|
"grad_norm": 48.11139678955078, |
|
"learning_rate": 1.5979381443298966e-07, |
|
"logits/chosen": -0.43041014671325684, |
|
"logits/rejected": -0.4028114676475525, |
|
"logps/chosen": -154.57138061523438, |
|
"logps/rejected": -164.22232055664062, |
|
"loss": 1.5667, |
|
"nll_loss": 0.4179977774620056, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -15.45713996887207, |
|
"rewards/margins": 0.9650918841362, |
|
"rewards/rejected": -16.422229766845703, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8783588558220168, |
|
"grad_norm": 47.23114776611328, |
|
"learning_rate": 1.3402061855670102e-07, |
|
"logits/chosen": -0.427821546792984, |
|
"logits/rejected": -0.4097885191440582, |
|
"logps/chosen": -154.52496337890625, |
|
"logps/rejected": -161.27987670898438, |
|
"loss": 1.5921, |
|
"nll_loss": 0.4322156012058258, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -15.452497482299805, |
|
"rewards/margins": 0.6754907369613647, |
|
"rewards/rejected": -16.127986907958984, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.9014735625541751, |
|
"grad_norm": 55.62732696533203, |
|
"learning_rate": 1.0824742268041237e-07, |
|
"logits/chosen": -0.461261123418808, |
|
"logits/rejected": -0.44340047240257263, |
|
"logps/chosen": -157.7149658203125, |
|
"logps/rejected": -168.34735107421875, |
|
"loss": 1.6161, |
|
"nll_loss": 0.42217200994491577, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -15.771496772766113, |
|
"rewards/margins": 1.0632401704788208, |
|
"rewards/rejected": -16.834735870361328, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9245882692863334, |
|
"grad_norm": 52.596492767333984, |
|
"learning_rate": 8.24742268041237e-08, |
|
"logits/chosen": -0.43360406160354614, |
|
"logits/rejected": -0.41087478399276733, |
|
"logps/chosen": -162.21621704101562, |
|
"logps/rejected": -167.1909637451172, |
|
"loss": 1.632, |
|
"nll_loss": 0.4444475769996643, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -16.221622467041016, |
|
"rewards/margins": 0.4974748194217682, |
|
"rewards/rejected": -16.719097137451172, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9245882692863334, |
|
"eval_logits/chosen": -0.40716680884361267, |
|
"eval_logits/rejected": -0.3811309337615967, |
|
"eval_logps/chosen": -156.50477600097656, |
|
"eval_logps/rejected": -163.44790649414062, |
|
"eval_loss": 1.6007416248321533, |
|
"eval_nll_loss": 0.42774829268455505, |
|
"eval_rewards/accuracies": 0.636956512928009, |
|
"eval_rewards/chosen": -15.65047550201416, |
|
"eval_rewards/margins": 0.6943140625953674, |
|
"eval_rewards/rejected": -16.344789505004883, |
|
"eval_runtime": 74.2865, |
|
"eval_samples_per_second": 24.581, |
|
"eval_steps_per_second": 1.548, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9477029760184917, |
|
"grad_norm": 50.8940315246582, |
|
"learning_rate": 5.670103092783505e-08, |
|
"logits/chosen": -0.36925220489501953, |
|
"logits/rejected": -0.35820272564888, |
|
"logps/chosen": -148.66673278808594, |
|
"logps/rejected": -157.42532348632812, |
|
"loss": 1.566, |
|
"nll_loss": 0.42418622970581055, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -14.866673469543457, |
|
"rewards/margins": 0.8758570551872253, |
|
"rewards/rejected": -15.742530822753906, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9708176827506501, |
|
"grad_norm": 44.86955642700195, |
|
"learning_rate": 3.092783505154639e-08, |
|
"logits/chosen": -0.40748652815818787, |
|
"logits/rejected": -0.383215069770813, |
|
"logps/chosen": -150.21824645996094, |
|
"logps/rejected": -155.44349670410156, |
|
"loss": 1.5783, |
|
"nll_loss": 0.4278343617916107, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -15.021825790405273, |
|
"rewards/margins": 0.5225244760513306, |
|
"rewards/rejected": -15.544349670410156, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9939323894828085, |
|
"grad_norm": 48.80271911621094, |
|
"learning_rate": 5.154639175257731e-09, |
|
"logits/chosen": -0.41907650232315063, |
|
"logits/rejected": -0.4291330873966217, |
|
"logps/chosen": -157.33888244628906, |
|
"logps/rejected": -164.2548370361328, |
|
"loss": 1.655, |
|
"nll_loss": 0.4265294075012207, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -15.733888626098633, |
|
"rewards/margins": 0.6915954351425171, |
|
"rewards/rejected": -16.425485610961914, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9985553308292401, |
|
"step": 432, |
|
"total_flos": 0.0, |
|
"train_loss": 1.77929983039697, |
|
"train_runtime": 9807.604, |
|
"train_samples_per_second": 5.646, |
|
"train_steps_per_second": 0.044 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 432, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|