{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9936102236421727, "eval_steps": 10000, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06389776357827476, "grad_norm": 0.341777060295131, "learning_rate": 9.375e-07, "log_odds_chosen": 0.05486620217561722, "log_odds_ratio": -0.6975381374359131, "logits/chosen": 33.646114349365234, "logits/rejected": 33.55038833618164, "logps/chosen": -0.9772504568099976, "logps/rejected": -1.0204073190689087, "loss": 1.6735, "nll_loss": 1.5879063606262207, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.09772505611181259, "rewards/margins": 0.004315672442317009, "rewards/rejected": -0.10204073041677475, "step": 10 }, { "epoch": 0.12779552715654952, "grad_norm": 0.357474094120966, "learning_rate": 1.875e-06, "log_odds_chosen": 0.054913729429244995, "log_odds_ratio": -0.6985839009284973, "logits/chosen": 34.386383056640625, "logits/rejected": 33.98283386230469, "logps/chosen": -0.9762754440307617, "logps/rejected": -1.0154238939285278, "loss": 1.668, "nll_loss": 1.6016145944595337, "rewards/accuracies": 0.515625, "rewards/chosen": -0.09762755781412125, "rewards/margins": 0.003914830274879932, "rewards/rejected": -0.10154237598180771, "step": 20 }, { "epoch": 0.19169329073482427, "grad_norm": 0.3765211315354108, "learning_rate": 2.8125e-06, "log_odds_chosen": 0.06909728795289993, "log_odds_ratio": -0.6911064386367798, "logits/chosen": 33.32851028442383, "logits/rejected": 32.749427795410156, "logps/chosen": -0.9813788533210754, "logps/rejected": -1.0349732637405396, "loss": 1.6757, "nll_loss": 1.6085166931152344, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.09813789278268814, "rewards/margins": 0.005359448026865721, "rewards/rejected": -0.10349734127521515, "step": 30 }, { "epoch": 0.25559105431309903, "grad_norm": 0.39833625263223615, "learning_rate": 2.993961440992859e-06, "log_odds_chosen": 0.11841567605733871, "log_odds_ratio": -0.6757725477218628, "logits/chosen": 33.839942932128906, "logits/rejected": 33.162986755371094, "logps/chosen": -0.9702426791191101, "logps/rejected": -1.0581519603729248, "loss": 1.664, "nll_loss": 1.6022984981536865, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.09702426195144653, "rewards/margins": 0.008790932595729828, "rewards/rejected": -0.10581519454717636, "step": 40 }, { "epoch": 0.3194888178913738, "grad_norm": 0.45952260963075464, "learning_rate": 2.9695130976348534e-06, "log_odds_chosen": 0.09267185628414154, "log_odds_ratio": -0.684535801410675, "logits/chosen": 34.052207946777344, "logits/rejected": 32.802547454833984, "logps/chosen": -0.9801093935966492, "logps/rejected": -1.0446739196777344, "loss": 1.6582, "nll_loss": 1.5952296257019043, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.09801094233989716, "rewards/margins": 0.0064564356580376625, "rewards/rejected": -0.10446737706661224, "step": 50 }, { "epoch": 0.38338658146964855, "grad_norm": 0.49504068932785444, "learning_rate": 2.9265847744427307e-06, "log_odds_chosen": 0.06481704860925674, "log_odds_ratio": -0.698478102684021, "logits/chosen": 32.32544708251953, "logits/rejected": 32.51976776123047, "logps/chosen": -0.9770153164863586, "logps/rejected": -1.0272716283798218, "loss": 1.6387, "nll_loss": 1.572589635848999, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.0977015346288681, "rewards/margins": 0.0050256275571882725, "rewards/rejected": -0.10272715240716934, "step": 60 }, { "epoch": 0.4472843450479233, "grad_norm": 0.5262433549192981, "learning_rate": 2.865716319988224e-06, "log_odds_chosen": 0.030134931206703186, "log_odds_ratio": -0.7101849317550659, "logits/chosen": 33.02894973754883, "logits/rejected": 33.309715270996094, "logps/chosen": -0.9551456570625305, "logps/rejected": -0.9776851534843445, "loss": 1.6289, "nll_loss": 1.5476250648498535, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.09551456570625305, "rewards/margins": 0.0022539461497217417, "rewards/rejected": -0.09776850789785385, "step": 70 }, { "epoch": 0.5111821086261981, "grad_norm": 0.5188886669640671, "learning_rate": 2.7876731904027993e-06, "log_odds_chosen": 0.08591620624065399, "log_odds_ratio": -0.6787055730819702, "logits/chosen": 34.61581039428711, "logits/rejected": 34.63213348388672, "logps/chosen": -0.9702480435371399, "logps/rejected": -1.0235538482666016, "loss": 1.5921, "nll_loss": 1.5600519180297852, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0970248132944107, "rewards/margins": 0.005330582614988089, "rewards/rejected": -0.10235539823770523, "step": 80 }, { "epoch": 0.5750798722044729, "grad_norm": 0.5939864943475555, "learning_rate": 2.6934368233226715e-06, "log_odds_chosen": 0.0763852447271347, "log_odds_ratio": -0.6891772150993347, "logits/chosen": 34.269248962402344, "logits/rejected": 34.00069046020508, "logps/chosen": -0.9529930949211121, "logps/rejected": -1.0018848180770874, "loss": 1.586, "nll_loss": 1.5087509155273438, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.09529931843280792, "rewards/margins": 0.004889167379587889, "rewards/rejected": -0.1001884788274765, "step": 90 }, { "epoch": 0.6389776357827476, "grad_norm": 0.6337044866684244, "learning_rate": 2.584192295741087e-06, "log_odds_chosen": 0.057517312467098236, "log_odds_ratio": -0.7008931040763855, "logits/chosen": 32.44559860229492, "logits/rejected": 32.7852783203125, "logps/chosen": -0.9386932253837585, "logps/rejected": -0.9813023805618286, "loss": 1.5499, "nll_loss": 1.4884004592895508, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.09386932104825974, "rewards/margins": 0.004260920453816652, "rewards/rejected": -0.0981302410364151, "step": 100 }, { "epoch": 0.7028753993610224, "grad_norm": 0.658610355052368, "learning_rate": 2.461313420977536e-06, "log_odds_chosen": 0.06078052520751953, "log_odds_ratio": -0.694057822227478, "logits/chosen": 34.87937927246094, "logits/rejected": 34.10810470581055, "logps/chosen": -0.9328826665878296, "logps/rejected": -0.9768407940864563, "loss": 1.5083, "nll_loss": 1.4501432180404663, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.09328827261924744, "rewards/margins": 0.004395808558911085, "rewards/rejected": -0.09768407791852951, "step": 110 }, { "epoch": 0.7667731629392971, "grad_norm": 0.7021451570051785, "learning_rate": 2.3263454721781537e-06, "log_odds_chosen": 0.012948527932167053, "log_odds_ratio": -0.7222181558609009, "logits/chosen": 32.433719635009766, "logits/rejected": 32.53590774536133, "logps/chosen": -0.9092488288879395, "logps/rejected": -0.9225506782531738, "loss": 1.4673, "nll_loss": 1.4067564010620117, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.09092487394809723, "rewards/margins": 0.0013302009319886565, "rewards/rejected": -0.09225507825613022, "step": 120 }, { "epoch": 0.8306709265175719, "grad_norm": 0.6227085414802311, "learning_rate": 2.18098574960932e-06, "log_odds_chosen": 0.08671535551548004, "log_odds_ratio": -0.6804493069648743, "logits/chosen": 33.88345718383789, "logits/rejected": 33.001914978027344, "logps/chosen": -0.8342811465263367, "logps/rejected": -0.8885458707809448, "loss": 1.3999, "nll_loss": 1.3158425092697144, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.08342811465263367, "rewards/margins": 0.005426469258964062, "rewards/rejected": -0.0888545885682106, "step": 130 }, { "epoch": 0.8945686900958466, "grad_norm": 0.6264864351444569, "learning_rate": 2.027062236122014e-06, "log_odds_chosen": 0.04714164510369301, "log_odds_ratio": -0.6999929547309875, "logits/chosen": 31.832406997680664, "logits/rejected": 31.350921630859375, "logps/chosen": -0.8387459516525269, "logps/rejected": -0.876442551612854, "loss": 1.3491, "nll_loss": 1.2940082550048828, "rewards/accuracies": 0.534375011920929, "rewards/chosen": -0.08387459814548492, "rewards/margins": 0.0037696503568440676, "rewards/rejected": -0.08764425665140152, "step": 140 }, { "epoch": 0.9584664536741214, "grad_norm": 0.5655314348380543, "learning_rate": 1.866510609206841e-06, "log_odds_chosen": 0.044148243963718414, "log_odds_ratio": -0.7019084692001343, "logits/chosen": 33.6451416015625, "logits/rejected": 33.04177474975586, "logps/chosen": -0.8360700607299805, "logps/rejected": -0.8655373454093933, "loss": 1.3242, "nll_loss": 1.2609388828277588, "rewards/accuracies": 0.53125, "rewards/chosen": -0.08360700309276581, "rewards/margins": 0.0029467367567121983, "rewards/rejected": -0.08655373752117157, "step": 150 }, { "epoch": 1.0223642172523961, "grad_norm": 0.5571454673635674, "learning_rate": 1.7013498987264833e-06, "log_odds_chosen": 0.06723789870738983, "log_odds_ratio": -0.6888397932052612, "logits/chosen": 33.27342987060547, "logits/rejected": 32.30434036254883, "logps/chosen": -0.7994817495346069, "logps/rejected": -0.8369095921516418, "loss": 1.2899, "nll_loss": 1.211586594581604, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -0.07994817197322845, "rewards/margins": 0.0037427886854857206, "rewards/rejected": -0.08369095623493195, "step": 160 }, { "epoch": 1.0862619808306708, "grad_norm": 0.5333966997862959, "learning_rate": 1.5336570964437077e-06, "log_odds_chosen": 0.038995109498500824, "log_odds_ratio": -0.6969125866889954, "logits/chosen": 32.00867462158203, "logits/rejected": 31.789836883544922, "logps/chosen": -0.8085753321647644, "logps/rejected": -0.8331602215766907, "loss": 1.2863, "nll_loss": 1.2334020137786865, "rewards/accuracies": 0.503125011920929, "rewards/chosen": -0.0808575376868248, "rewards/margins": 0.0024584876373410225, "rewards/rejected": -0.08331602811813354, "step": 170 }, { "epoch": 1.1501597444089458, "grad_norm": 0.5317464882215207, "learning_rate": 1.3655410366448499e-06, "log_odds_chosen": 0.10975570976734161, "log_odds_ratio": -0.6656275987625122, "logits/chosen": 32.76184844970703, "logits/rejected": 32.081581115722656, "logps/chosen": -0.7589792013168335, "logps/rejected": -0.8160526156425476, "loss": 1.2451, "nll_loss": 1.1476625204086304, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0758979320526123, "rewards/margins": 0.0057073310017585754, "rewards/rejected": -0.08160526305437088, "step": 180 }, { "epoch": 1.2140575079872205, "grad_norm": 0.5186911330716555, "learning_rate": 1.199115876325091e-06, "log_odds_chosen": 0.04539128392934799, "log_odds_ratio": -0.6990563869476318, "logits/chosen": 33.35085678100586, "logits/rejected": 33.3908805847168, "logps/chosen": -0.7675826549530029, "logps/rejected": -0.7923057675361633, "loss": 1.2217, "nll_loss": 1.161102294921875, "rewards/accuracies": 0.53125, "rewards/chosen": -0.07675826549530029, "rewards/margins": 0.0024723131209611893, "rewards/rejected": -0.07923058420419693, "step": 190 }, { "epoch": 1.2779552715654952, "grad_norm": 0.5411315465201, "learning_rate": 1.036474508437579e-06, "log_odds_chosen": 0.07649532705545425, "log_odds_ratio": -0.6858269572257996, "logits/chosen": 30.826059341430664, "logits/rejected": 30.824636459350586, "logps/chosen": -0.786870002746582, "logps/rejected": -0.8327391743659973, "loss": 1.215, "nll_loss": 1.165637493133545, "rewards/accuracies": 0.578125, "rewards/chosen": -0.07868699729442596, "rewards/margins": 0.004586914554238319, "rewards/rejected": -0.08327391743659973, "step": 200 }, { "epoch": 1.34185303514377, "grad_norm": 0.5587119516125117, "learning_rate": 8.796622425502193e-07, "log_odds_chosen": 0.04922567307949066, "log_odds_ratio": -0.6928130984306335, "logits/chosen": 32.06999969482422, "logits/rejected": 31.100805282592773, "logps/chosen": -0.760046124458313, "logps/rejected": -0.7879078388214111, "loss": 1.2018, "nll_loss": 1.1223350763320923, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.07600460201501846, "rewards/margins": 0.00278617930598557, "rewards/rejected": -0.07879078388214111, "step": 210 }, { "epoch": 1.4057507987220448, "grad_norm": 0.5532006875711314, "learning_rate": 7.30651083891141e-07, "log_odds_chosen": 0.09409201890230179, "log_odds_ratio": -0.6719040870666504, "logits/chosen": 31.775470733642578, "logits/rejected": 31.210159301757812, "logps/chosen": -0.7408252954483032, "logps/rejected": -0.7944933772087097, "loss": 1.1848, "nll_loss": 1.1082121133804321, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07408253848552704, "rewards/margins": 0.005366811528801918, "rewards/rejected": -0.07944934070110321, "step": 220 }, { "epoch": 1.4696485623003195, "grad_norm": 0.5478964489714558, "learning_rate": 5.913149342387704e-07, "log_odds_chosen": 0.022008871659636497, "log_odds_ratio": -0.7079219818115234, "logits/chosen": 30.458974838256836, "logits/rejected": 30.637874603271484, "logps/chosen": -0.7661860585212708, "logps/rejected": -0.783053994178772, "loss": 1.1753, "nll_loss": 1.1201671361923218, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.07661859691143036, "rewards/margins": 0.00168679840862751, "rewards/rejected": -0.07830540090799332, "step": 230 }, { "epoch": 1.5335463258785942, "grad_norm": 0.5614636317534148, "learning_rate": 4.63406026519703e-07, "log_odds_chosen": 0.12717078626155853, "log_odds_ratio": -0.6581142544746399, "logits/chosen": 31.024799346923828, "logits/rejected": 30.477502822875977, "logps/chosen": -0.7121531367301941, "logps/rejected": -0.7798753380775452, "loss": 1.1701, "nll_loss": 1.073432207107544, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.07121531665325165, "rewards/margins": 0.006772211752831936, "rewards/rejected": -0.07798753678798676, "step": 240 }, { "epoch": 1.5974440894568689, "grad_norm": 0.5532326330060124, "learning_rate": 3.4853288946298335e-07, "log_odds_chosen": 0.031235849484801292, "log_odds_ratio": -0.7052245140075684, "logits/chosen": 31.759140014648438, "logits/rejected": 31.2039737701416, "logps/chosen": -0.7514272332191467, "logps/rejected": -0.7712705135345459, "loss": 1.1628, "nll_loss": 1.0986436605453491, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.07514272630214691, "rewards/margins": 0.0019843343179672956, "rewards/rejected": -0.07712705433368683, "step": 250 }, { "epoch": 1.6613418530351438, "grad_norm": 0.5579681347407102, "learning_rate": 2.48140119418046e-07, "log_odds_chosen": 0.09361070394515991, "log_odds_ratio": -0.6759673953056335, "logits/chosen": 30.127365112304688, "logits/rejected": 29.955150604248047, "logps/chosen": -0.7377598881721497, "logps/rejected": -0.789117157459259, "loss": 1.1595, "nll_loss": 1.0882426500320435, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.0737759917974472, "rewards/margins": 0.0051357327029109, "rewards/rejected": -0.07891170680522919, "step": 260 }, { "epoch": 1.7252396166134185, "grad_norm": 0.5388500509178578, "learning_rate": 1.634902137174483e-07, "log_odds_chosen": 0.06861741840839386, "log_odds_ratio": -0.6897167563438416, "logits/chosen": 31.801860809326172, "logits/rejected": 30.827133178710938, "logps/chosen": -0.7297223210334778, "logps/rejected": -0.7675324082374573, "loss": 1.15, "nll_loss": 1.091435194015503, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.07297223806381226, "rewards/margins": 0.003781010629609227, "rewards/rejected": -0.07675323635339737, "step": 270 }, { "epoch": 1.7891373801916934, "grad_norm": 0.5597110218019701, "learning_rate": 9.564769404039419e-08, "log_odds_chosen": 0.10912013053894043, "log_odds_ratio": -0.6706396341323853, "logits/chosen": 30.64389419555664, "logits/rejected": 30.014019012451172, "logps/chosen": -0.7340375185012817, "logps/rejected": -0.7948423624038696, "loss": 1.1518, "nll_loss": 1.0911478996276855, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.0734037533402443, "rewards/margins": 0.00608047703281045, "rewards/rejected": -0.0794842392206192, "step": 280 }, { "epoch": 1.8530351437699681, "grad_norm": 0.5432963139659418, "learning_rate": 4.546571943496969e-08, "log_odds_chosen": 0.071634940803051, "log_odds_ratio": -0.6880494356155396, "logits/chosen": 31.523670196533203, "logits/rejected": 31.051345825195312, "logps/chosen": -0.7210476398468018, "logps/rejected": -0.7614242434501648, "loss": 1.1457, "nll_loss": 1.078147292137146, "rewards/accuracies": 0.53125, "rewards/chosen": -0.07210476696491241, "rewards/margins": 0.0040376619435846806, "rewards/rejected": -0.07614242285490036, "step": 290 }, { "epoch": 1.9169329073482428, "grad_norm": 0.5550233900085839, "learning_rate": 1.357535734809795e-08, "log_odds_chosen": 0.08968226611614227, "log_odds_ratio": -0.6760362386703491, "logits/chosen": 30.686954498291016, "logits/rejected": 29.28194236755371, "logps/chosen": -0.7248884439468384, "logps/rejected": -0.7753577828407288, "loss": 1.1474, "nll_loss": 1.0696710348129272, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.07248884439468384, "rewards/margins": 0.005046932026743889, "rewards/rejected": -0.07753578573465347, "step": 300 }, { "epoch": 1.9808306709265175, "grad_norm": 0.49758643306678957, "learning_rate": 3.77647586240204e-10, "log_odds_chosen": 0.048125725239515305, "log_odds_ratio": -0.6949858665466309, "logits/chosen": 30.8554744720459, "logits/rejected": 30.65484046936035, "logps/chosen": -0.7389064431190491, "logps/rejected": -0.7689411640167236, "loss": 1.1435, "nll_loss": 1.0751014947891235, "rewards/accuracies": 0.534375011920929, "rewards/chosen": -0.07389064878225327, "rewards/margins": 0.0030034759547561407, "rewards/rejected": -0.0768941268324852, "step": 310 }, { "epoch": 1.9936102236421727, "step": 312, "total_flos": 0.0, "train_loss": 1.3676127867820935, "train_runtime": 5524.9246, "train_samples_per_second": 7.24, "train_steps_per_second": 0.056 } ], "logging_steps": 10, "max_steps": 312, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }