{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9936102236421727, "eval_steps": 10000, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06389776357827476, "grad_norm": 0.4242139016276009, "learning_rate": 3.125e-05, "log_odds_chosen": 0.05670202523469925, "log_odds_ratio": -0.6963189840316772, "logits/chosen": 33.65892791748047, "logits/rejected": 33.56386947631836, "logps/chosen": -0.9726333618164062, "logps/rejected": -1.0167349576950073, "loss": 1.6695, "nll_loss": 1.5828170776367188, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.09726335108280182, "rewards/margins": 0.004410145338624716, "rewards/rejected": -0.10167349874973297, "step": 10 }, { "epoch": 0.12779552715654952, "grad_norm": 0.527757575829653, "learning_rate": 6.25e-05, "log_odds_chosen": 0.060249220579862595, "log_odds_ratio": -0.6938394904136658, "logits/chosen": 34.7052001953125, "logits/rejected": 34.30677032470703, "logps/chosen": -0.8907906413078308, "logps/rejected": -0.9303584098815918, "loss": 1.4944, "nll_loss": 1.4315097332000732, "rewards/accuracies": 0.546875, "rewards/chosen": -0.08907906711101532, "rewards/margins": 0.003956770058721304, "rewards/rejected": -0.09303583949804306, "step": 20 }, { "epoch": 0.19169329073482427, "grad_norm": 0.4014606132018809, "learning_rate": 9.375e-05, "log_odds_chosen": 0.04292842000722885, "log_odds_ratio": -0.698261022567749, "logits/chosen": 30.987014770507812, "logits/rejected": 30.439056396484375, "logps/chosen": -0.6581910252571106, "logps/rejected": -0.6830389499664307, "loss": 1.0721, "nll_loss": 1.006190538406372, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.06581910699605942, "rewards/margins": 0.002484795870259404, "rewards/rejected": -0.0683038979768753, "step": 30 }, { "epoch": 0.25559105431309903, "grad_norm": 0.09410150706534143, "learning_rate": 9.979871469976196e-05, "log_odds_chosen": 0.07667034864425659, "log_odds_ratio": -0.6961523294448853, "logits/chosen": 28.151453018188477, "logits/rejected": 27.650014877319336, "logps/chosen": -0.43666666746139526, "logps/rejected": -0.46118512749671936, "loss": 0.8277, "nll_loss": 0.7572250366210938, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.043666668236255646, "rewards/margins": 0.0024518456775695086, "rewards/rejected": -0.046118512749671936, "step": 40 }, { "epoch": 0.3194888178913738, "grad_norm": 0.09768125665941661, "learning_rate": 9.898376992116179e-05, "log_odds_chosen": 0.06605090945959091, "log_odds_ratio": -0.7164387106895447, "logits/chosen": 29.047494888305664, "logits/rejected": 27.90814781188965, "logps/chosen": -0.37932097911834717, "logps/rejected": -0.39148497581481934, "loss": 0.7778, "nll_loss": 0.7044206261634827, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.03793209791183472, "rewards/margins": 0.0012163992505520582, "rewards/rejected": -0.03914849832653999, "step": 50 }, { "epoch": 0.38338658146964855, "grad_norm": 0.07343907441931204, "learning_rate": 9.755282581475769e-05, "log_odds_chosen": 0.08987905830144882, "log_odds_ratio": -0.7063173055648804, "logits/chosen": 28.47494125366211, "logits/rejected": 28.794668197631836, "logps/chosen": -0.3645591139793396, "logps/rejected": -0.38652682304382324, "loss": 0.7576, "nll_loss": 0.6854395866394043, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.0364559181034565, "rewards/margins": 0.0021967687644064426, "rewards/rejected": -0.038652680814266205, "step": 60 }, { "epoch": 0.4472843450479233, "grad_norm": 0.06951733417216101, "learning_rate": 9.552387733294081e-05, "log_odds_chosen": 0.1109728068113327, "log_odds_ratio": -0.7004402875900269, "logits/chosen": 29.863407135009766, "logits/rejected": 30.27783203125, "logps/chosen": -0.35166820883750916, "logps/rejected": -0.3710673451423645, "loss": 0.7621, "nll_loss": 0.6829724907875061, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.035166822373867035, "rewards/margins": 0.0019399106968194246, "rewards/rejected": -0.03710673004388809, "step": 70 }, { "epoch": 0.5111821086261981, "grad_norm": 0.06675882825106033, "learning_rate": 9.292243968009331e-05, "log_odds_chosen": 0.22688157856464386, "log_odds_ratio": -0.6433924436569214, "logits/chosen": 31.850915908813477, "logits/rejected": 32.051448822021484, "logps/chosen": -0.35137858986854553, "logps/rejected": -0.4119800925254822, "loss": 0.7548, "nll_loss": 0.6953305602073669, "rewards/accuracies": 0.640625, "rewards/chosen": -0.03513786196708679, "rewards/margins": 0.006060150917619467, "rewards/rejected": -0.0411980114877224, "step": 80 }, { "epoch": 0.5750798722044729, "grad_norm": 0.07132675829804414, "learning_rate": 8.978122744408906e-05, "log_odds_chosen": 0.20013344287872314, "log_odds_ratio": -0.6580983400344849, "logits/chosen": 32.63883972167969, "logits/rejected": 32.50263595581055, "logps/chosen": -0.35542625188827515, "logps/rejected": -0.41074585914611816, "loss": 0.7549, "nll_loss": 0.6860114932060242, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.03554262965917587, "rewards/margins": 0.0055319564417004585, "rewards/rejected": -0.04107458516955376, "step": 90 }, { "epoch": 0.6389776357827476, "grad_norm": 0.09149445807314589, "learning_rate": 8.613974319136958e-05, "log_odds_chosen": 0.2682611346244812, "log_odds_ratio": -0.6336508989334106, "logits/chosen": 32.11505889892578, "logits/rejected": 32.7309455871582, "logps/chosen": -0.35972970724105835, "logps/rejected": -0.4426427483558655, "loss": 0.753, "nll_loss": 0.6944054365158081, "rewards/accuracies": 0.653124988079071, "rewards/chosen": -0.035972971469163895, "rewards/margins": 0.008291301317512989, "rewards/rejected": -0.04426427185535431, "step": 100 }, { "epoch": 0.7028753993610224, "grad_norm": 0.08926217777865493, "learning_rate": 8.20437806992512e-05, "log_odds_chosen": 0.2868707776069641, "log_odds_ratio": -0.6233163475990295, "logits/chosen": 35.566490173339844, "logits/rejected": 34.921634674072266, "logps/chosen": -0.3582688868045807, "logps/rejected": -0.4581901431083679, "loss": 0.744, "nll_loss": 0.6832225918769836, "rewards/accuracies": 0.6343749761581421, "rewards/chosen": -0.03582689166069031, "rewards/margins": 0.00999213196337223, "rewards/rejected": -0.04581902176141739, "step": 110 }, { "epoch": 0.7667731629392971, "grad_norm": 0.14837704688840048, "learning_rate": 7.754484907260513e-05, "log_odds_chosen": 0.30330824851989746, "log_odds_ratio": -0.6247963905334473, "logits/chosen": 34.01176834106445, "logits/rejected": 34.39574432373047, "logps/chosen": -0.35339340567588806, "logps/rejected": -0.457806259393692, "loss": 0.7419, "nll_loss": 0.6851586103439331, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.035339340567588806, "rewards/margins": 0.01044128555804491, "rewards/rejected": -0.045780621469020844, "step": 120 }, { "epoch": 0.8306709265175719, "grad_norm": 0.10699968939091851, "learning_rate": 7.269952498697734e-05, "log_odds_chosen": 0.3560214638710022, "log_odds_ratio": -0.6051545143127441, "logits/chosen": 36.380924224853516, "logits/rejected": 35.524024963378906, "logps/chosen": -0.355410635471344, "logps/rejected": -0.47948652505874634, "loss": 0.7296, "nll_loss": 0.6667423844337463, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.03554106876254082, "rewards/margins": 0.01240758690983057, "rewards/rejected": -0.04794865846633911, "step": 130 }, { "epoch": 0.8945686900958466, "grad_norm": 0.1720595584929413, "learning_rate": 6.756874120406714e-05, "log_odds_chosen": 0.5141419172286987, "log_odds_ratio": -0.5401668548583984, "logits/chosen": 34.641624450683594, "logits/rejected": 34.34906768798828, "logps/chosen": -0.347829669713974, "logps/rejected": -0.5514229536056519, "loss": 0.7143, "nll_loss": 0.6645579934120178, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.03478296846151352, "rewards/margins": 0.020359333604574203, "rewards/rejected": -0.05514230206608772, "step": 140 }, { "epoch": 0.9584664536741214, "grad_norm": 0.12630063541107736, "learning_rate": 6.22170203068947e-05, "log_odds_chosen": 0.5517296195030212, "log_odds_ratio": -0.543639600276947, "logits/chosen": 36.98326873779297, "logits/rejected": 36.52146530151367, "logps/chosen": -0.3562212884426117, "logps/rejected": -0.578203022480011, "loss": 0.7162, "nll_loss": 0.6639058589935303, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.03562213480472565, "rewards/margins": 0.02219817042350769, "rewards/rejected": -0.05782030150294304, "step": 150 }, { "epoch": 1.0223642172523961, "grad_norm": 0.1403636887974213, "learning_rate": 5.6711663290882776e-05, "log_odds_chosen": 0.6598159074783325, "log_odds_ratio": -0.5024036169052124, "logits/chosen": 36.992469787597656, "logits/rejected": 35.901676177978516, "logps/chosen": -0.33446019887924194, "logps/rejected": -0.5835962891578674, "loss": 0.7105, "nll_loss": 0.657503604888916, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.033446017652750015, "rewards/margins": 0.0249136071652174, "rewards/rejected": -0.05835963040590286, "step": 160 }, { "epoch": 1.0862619808306708, "grad_norm": 0.14799945317167665, "learning_rate": 5.112190321479026e-05, "log_odds_chosen": 0.6551031470298767, "log_odds_ratio": -0.5045552849769592, "logits/chosen": 35.59492874145508, "logits/rejected": 35.479576110839844, "logps/chosen": -0.36466851830482483, "logps/rejected": -0.6384136080741882, "loss": 0.7194, "nll_loss": 0.6874456405639648, "rewards/accuracies": 0.765625, "rewards/chosen": -0.03646685555577278, "rewards/margins": 0.027374515309929848, "rewards/rejected": -0.06384135782718658, "step": 170 }, { "epoch": 1.1501597444089458, "grad_norm": 0.4144450852693044, "learning_rate": 4.551803455482833e-05, "log_odds_chosen": 0.8569015264511108, "log_odds_ratio": -0.43813252449035645, "logits/chosen": 36.478363037109375, "logits/rejected": 35.625038146972656, "logps/chosen": -0.3283715844154358, "logps/rejected": -0.6759421229362488, "loss": 0.7011, "nll_loss": 0.6358424425125122, "rewards/accuracies": 0.809374988079071, "rewards/chosen": -0.0328371599316597, "rewards/margins": 0.03475705534219742, "rewards/rejected": -0.06759421527385712, "step": 180 }, { "epoch": 1.2140575079872205, "grad_norm": 0.1393106135709619, "learning_rate": 3.9970529210836366e-05, "log_odds_chosen": 0.7694636583328247, "log_odds_ratio": -0.48203492164611816, "logits/chosen": 37.65850067138672, "logits/rejected": 37.32286071777344, "logps/chosen": -0.34162312746047974, "logps/rejected": -0.6649552583694458, "loss": 0.696, "nll_loss": 0.6590245962142944, "rewards/accuracies": 0.78125, "rewards/chosen": -0.034162312746047974, "rewards/margins": 0.032333213835954666, "rewards/rejected": -0.06649552285671234, "step": 190 }, { "epoch": 1.2779552715654952, "grad_norm": 0.18676893862042468, "learning_rate": 3.4549150281252636e-05, "log_odds_chosen": 0.8638985753059387, "log_odds_ratio": -0.4615907073020935, "logits/chosen": 34.856327056884766, "logits/rejected": 34.688533782958984, "logps/chosen": -0.3576621413230896, "logps/rejected": -0.74553382396698, "loss": 0.6981, "nll_loss": 0.6673237085342407, "rewards/accuracies": 0.78125, "rewards/chosen": -0.03576621413230896, "rewards/margins": 0.03878717124462128, "rewards/rejected": -0.07455337792634964, "step": 200 }, { "epoch": 1.34185303514377, "grad_norm": 0.2564156817901977, "learning_rate": 2.932207475167398e-05, "log_odds_chosen": 0.8325332403182983, "log_odds_ratio": -0.45953792333602905, "logits/chosen": 36.798072814941406, "logits/rejected": 35.09117889404297, "logps/chosen": -0.3448064923286438, "logps/rejected": -0.7093031406402588, "loss": 0.6982, "nll_loss": 0.6432278752326965, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.03448065370321274, "rewards/margins": 0.03644966334104538, "rewards/rejected": -0.07093031704425812, "step": 210 }, { "epoch": 1.4057507987220448, "grad_norm": 0.1531688356816753, "learning_rate": 2.43550361297047e-05, "log_odds_chosen": 1.0281397104263306, "log_odds_ratio": -0.4170387387275696, "logits/chosen": 35.599212646484375, "logits/rejected": 34.3337287902832, "logps/chosen": -0.3171806335449219, "logps/rejected": -0.7794741988182068, "loss": 0.6881, "nll_loss": 0.6331400275230408, "rewards/accuracies": 0.84375, "rewards/chosen": -0.03171805664896965, "rewards/margins": 0.04622935503721237, "rewards/rejected": -0.07794742286205292, "step": 220 }, { "epoch": 1.4696485623003195, "grad_norm": 0.13237811516695533, "learning_rate": 1.971049780795901e-05, "log_odds_chosen": 0.9918270111083984, "log_odds_ratio": -0.440161794424057, "logits/chosen": 34.80529022216797, "logits/rejected": 34.433349609375, "logps/chosen": -0.3528688848018646, "logps/rejected": -0.8333184123039246, "loss": 0.684, "nll_loss": 0.653908908367157, "rewards/accuracies": 0.8125, "rewards/chosen": -0.03528688848018646, "rewards/margins": 0.04804495349526405, "rewards/rejected": -0.08333183825016022, "step": 230 }, { "epoch": 1.5335463258785942, "grad_norm": 0.17618952870677607, "learning_rate": 1.544686755065677e-05, "log_odds_chosen": 1.110446810722351, "log_odds_ratio": -0.39400768280029297, "logits/chosen": 35.90544891357422, "logits/rejected": 34.70677185058594, "logps/chosen": -0.3103191554546356, "logps/rejected": -0.8037627935409546, "loss": 0.6899, "nll_loss": 0.6227988004684448, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.03103191778063774, "rewards/margins": 0.04934436455368996, "rewards/rejected": -0.0803762823343277, "step": 240 }, { "epoch": 1.5974440894568689, "grad_norm": 0.17643350651397863, "learning_rate": 1.1617762982099446e-05, "log_odds_chosen": 1.0278904438018799, "log_odds_ratio": -0.40579158067703247, "logits/chosen": 36.86786651611328, "logits/rejected": 35.534324645996094, "logps/chosen": -0.34132060408592224, "logps/rejected": -0.8093164563179016, "loss": 0.6839, "nll_loss": 0.6438942551612854, "rewards/accuracies": 0.8218749761581421, "rewards/chosen": -0.034132059663534164, "rewards/margins": 0.04679957777261734, "rewards/rejected": -0.0809316486120224, "step": 250 }, { "epoch": 1.6613418530351438, "grad_norm": 0.20335250536853633, "learning_rate": 8.271337313934869e-06, "log_odds_chosen": 1.0964257717132568, "log_odds_ratio": -0.40502405166625977, "logits/chosen": 35.01730728149414, "logits/rejected": 33.87272262573242, "logps/chosen": -0.35952043533325195, "logps/rejected": -0.8931809663772583, "loss": 0.6875, "nll_loss": 0.6542503833770752, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.035952042788267136, "rewards/margins": 0.0533660463988781, "rewards/rejected": -0.08931808918714523, "step": 260 }, { "epoch": 1.7252396166134185, "grad_norm": 0.1417159132341433, "learning_rate": 5.449673790581611e-06, "log_odds_chosen": 1.1632859706878662, "log_odds_ratio": -0.39327362179756165, "logits/chosen": 37.34235763549805, "logits/rejected": 35.14636993408203, "logps/chosen": -0.3363361358642578, "logps/rejected": -0.8959217071533203, "loss": 0.6837, "nll_loss": 0.6559703350067139, "rewards/accuracies": 0.8531249761581421, "rewards/chosen": -0.03363361582159996, "rewards/margins": 0.055958546698093414, "rewards/rejected": -0.08959217369556427, "step": 270 }, { "epoch": 1.7891373801916934, "grad_norm": 0.39828151397493416, "learning_rate": 3.18825646801314e-06, "log_odds_chosen": 1.0620207786560059, "log_odds_ratio": -0.41589412093162537, "logits/chosen": 35.572288513183594, "logits/rejected": 34.087669372558594, "logps/chosen": -0.3491634726524353, "logps/rejected": -0.8419939875602722, "loss": 0.6856, "nll_loss": 0.6521117091178894, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.03491634875535965, "rewards/margins": 0.04928305745124817, "rewards/rejected": -0.08419940620660782, "step": 280 }, { "epoch": 1.8530351437699681, "grad_norm": 0.17410175622655502, "learning_rate": 1.5155239811656563e-06, "log_odds_chosen": 1.1792383193969727, "log_odds_ratio": -0.3946213126182556, "logits/chosen": 36.629905700683594, "logits/rejected": 35.32917404174805, "logps/chosen": -0.33119866251945496, "logps/rejected": -0.899772047996521, "loss": 0.6837, "nll_loss": 0.6425603628158569, "rewards/accuracies": 0.815625011920929, "rewards/chosen": -0.033119868487119675, "rewards/margins": 0.05685734748840332, "rewards/rejected": -0.0899772122502327, "step": 290 }, { "epoch": 1.9169329073482428, "grad_norm": 0.13374493386406422, "learning_rate": 4.52511911603265e-07, "log_odds_chosen": 1.0675084590911865, "log_odds_ratio": -0.42029696702957153, "logits/chosen": 36.18767547607422, "logits/rejected": 33.20973205566406, "logps/chosen": -0.34383276104927063, "logps/rejected": -0.8607551455497742, "loss": 0.6844, "nll_loss": 0.6432604193687439, "rewards/accuracies": 0.809374988079071, "rewards/chosen": -0.034383274614810944, "rewards/margins": 0.05169224739074707, "rewards/rejected": -0.08607552200555801, "step": 300 }, { "epoch": 1.9808306709265175, "grad_norm": 0.1377772193573712, "learning_rate": 1.2588252874673468e-08, "log_odds_chosen": 1.0935267210006714, "log_odds_ratio": -0.41092976927757263, "logits/chosen": 36.207176208496094, "logits/rejected": 34.82603073120117, "logps/chosen": -0.344975084066391, "logps/rejected": -0.8865535855293274, "loss": 0.6781, "nll_loss": 0.6345787644386292, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.03449751064181328, "rewards/margins": 0.05415785312652588, "rewards/rejected": -0.08865536749362946, "step": 310 }, { "epoch": 1.9936102236421727, "step": 312, "total_flos": 0.0, "train_loss": 0.7846963420892373, "train_runtime": 5577.3844, "train_samples_per_second": 7.172, "train_steps_per_second": 0.056 } ], "logging_steps": 10, "max_steps": 312, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }