{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 504, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02976190476190476, "grad_norm": 1643.52392578125, "learning_rate": 5.000000000000001e-07, "log_odds_chosen": -0.34639421105384827, "log_odds_ratio": -1.0579421520233154, "logits/chosen": 125.67509460449219, "logits/rejected": 180.79092407226562, "logps/chosen": -15.30119514465332, "logps/rejected": -14.954809188842773, "loss": 15.2275, "nll_loss": 14.854708671569824, "rewards/accuracies": 0.25, "rewards/chosen": -7.65059757232666, "rewards/margins": -0.1731930673122406, "rewards/rejected": -7.477404594421387, "step": 5 }, { "epoch": 0.05952380952380952, "grad_norm": 859.1522827148438, "learning_rate": 1.0000000000000002e-06, "log_odds_chosen": -0.04612647369503975, "log_odds_ratio": -0.8707455396652222, "logits/chosen": 226.11355590820312, "logits/rejected": 262.4227600097656, "logps/chosen": -10.240728378295898, "logps/rejected": -10.194613456726074, "loss": 10.6297, "nll_loss": 10.136636734008789, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -5.120364189147949, "rewards/margins": -0.023057078942656517, "rewards/rejected": -5.097306728363037, "step": 10 }, { "epoch": 0.08928571428571429, "grad_norm": 192.85568237304688, "learning_rate": 1.5e-06, "log_odds_chosen": -0.1222303956747055, "log_odds_ratio": -0.8104267120361328, "logits/chosen": 293.97882080078125, "logits/rejected": 282.1849670410156, "logps/chosen": -5.908555507659912, "logps/rejected": -5.787174224853516, "loss": 6.2671, "nll_loss": 5.8525919914245605, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -2.954277753829956, "rewards/margins": -0.06069115549325943, "rewards/rejected": -2.893587112426758, "step": 15 }, { "epoch": 0.11904761904761904, "grad_norm": 181.04627990722656, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": 0.1651725322008133, "log_odds_ratio": -0.8192933797836304, "logits/chosen": 280.179931640625, "logits/rejected": 263.22296142578125, "logps/chosen": -3.191753625869751, "logps/rejected": -3.352200984954834, "loss": 3.8066, "nll_loss": 3.3215103149414062, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.5958768129348755, "rewards/margins": 0.08022388815879822, "rewards/rejected": -1.676100492477417, "step": 20 }, { "epoch": 0.1488095238095238, "grad_norm": 124.88711547851562, "learning_rate": 2.5e-06, "log_odds_chosen": 0.21610824763774872, "log_odds_ratio": -0.6467095613479614, "logits/chosen": 337.0777587890625, "logits/rejected": 361.3624267578125, "logps/chosen": -2.110491991043091, "logps/rejected": -2.2703230381011963, "loss": 2.9167, "nll_loss": 2.676313638687134, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.0552459955215454, "rewards/margins": 0.07991557568311691, "rewards/rejected": -1.1351615190505981, "step": 25 }, { "epoch": 0.17857142857142858, "grad_norm": 120.593994140625, "learning_rate": 3e-06, "log_odds_chosen": 0.2517230808734894, "log_odds_ratio": -0.639795184135437, "logits/chosen": 322.35894775390625, "logits/rejected": 409.290771484375, "logps/chosen": -2.053133964538574, "logps/rejected": -2.262042999267578, "loss": 2.4846, "nll_loss": 2.1683788299560547, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.026566982269287, "rewards/margins": 0.10445437580347061, "rewards/rejected": -1.131021499633789, "step": 30 }, { "epoch": 0.20833333333333334, "grad_norm": 45.12126541137695, "learning_rate": 3.5e-06, "log_odds_chosen": 0.5470780730247498, "log_odds_ratio": -0.5346588492393494, "logits/chosen": 374.33734130859375, "logits/rejected": 398.3028259277344, "logps/chosen": -1.4850542545318604, "logps/rejected": -1.9500999450683594, "loss": 2.3067, "nll_loss": 1.7573463916778564, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7425271272659302, "rewards/margins": 0.23252280056476593, "rewards/rejected": -0.9750499725341797, "step": 35 }, { "epoch": 0.23809523809523808, "grad_norm": 34.55175018310547, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": 0.48626986145973206, "log_odds_ratio": -0.5875475406646729, "logits/chosen": 343.8048400878906, "logits/rejected": 384.3130187988281, "logps/chosen": -1.6461362838745117, "logps/rejected": -2.0677056312561035, "loss": 2.2439, "nll_loss": 1.988368272781372, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.8230681419372559, "rewards/margins": 0.2107846736907959, "rewards/rejected": -1.0338528156280518, "step": 40 }, { "epoch": 0.26785714285714285, "grad_norm": 45.629512786865234, "learning_rate": 4.5e-06, "log_odds_chosen": -0.03384453058242798, "log_odds_ratio": -0.7663249969482422, "logits/chosen": 391.72265625, "logits/rejected": 376.98858642578125, "logps/chosen": -1.8141120672225952, "logps/rejected": -1.77732253074646, "loss": 2.1803, "nll_loss": 1.894079566001892, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.9070560336112976, "rewards/margins": -0.018394792452454567, "rewards/rejected": -0.88866126537323, "step": 45 }, { "epoch": 0.2976190476190476, "grad_norm": 47.596744537353516, "learning_rate": 5e-06, "log_odds_chosen": 0.6274509429931641, "log_odds_ratio": -0.5308854579925537, "logits/chosen": 436.2086486816406, "logits/rejected": 442.98974609375, "logps/chosen": -1.5697168111801147, "logps/rejected": -2.105163097381592, "loss": 2.1885, "nll_loss": 2.0384533405303955, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7848584055900574, "rewards/margins": 0.26772308349609375, "rewards/rejected": -1.052581548690796, "step": 50 }, { "epoch": 0.3273809523809524, "grad_norm": 35.20968246459961, "learning_rate": 5.500000000000001e-06, "log_odds_chosen": 0.3415950834751129, "log_odds_ratio": -0.6474028825759888, "logits/chosen": 370.7908020019531, "logits/rejected": 369.24395751953125, "logps/chosen": -1.539645791053772, "logps/rejected": -1.8215446472167969, "loss": 2.2186, "nll_loss": 1.9263942241668701, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.769822895526886, "rewards/margins": 0.14094945788383484, "rewards/rejected": -0.9107723236083984, "step": 55 }, { "epoch": 0.35714285714285715, "grad_norm": 56.7140998840332, "learning_rate": 6e-06, "log_odds_chosen": 0.23950794339179993, "log_odds_ratio": -0.634242057800293, "logits/chosen": 393.73089599609375, "logits/rejected": 364.10833740234375, "logps/chosen": -1.4558594226837158, "logps/rejected": -1.6270105838775635, "loss": 2.0389, "nll_loss": 2.0331270694732666, "rewards/accuracies": 0.5, "rewards/chosen": -0.7279297113418579, "rewards/margins": 0.08557556569576263, "rewards/rejected": -0.8135052919387817, "step": 60 }, { "epoch": 0.3869047619047619, "grad_norm": 35.615726470947266, "learning_rate": 6.5000000000000004e-06, "log_odds_chosen": 0.3183743357658386, "log_odds_ratio": -0.6254990696907043, "logits/chosen": 390.70111083984375, "logits/rejected": 394.5395812988281, "logps/chosen": -1.4384119510650635, "logps/rejected": -1.6543052196502686, "loss": 1.9284, "nll_loss": 1.6583305597305298, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7192059755325317, "rewards/margins": 0.10794667154550552, "rewards/rejected": -0.8271526098251343, "step": 65 }, { "epoch": 0.4166666666666667, "grad_norm": 39.04438018798828, "learning_rate": 7e-06, "log_odds_chosen": 0.40005454421043396, "log_odds_ratio": -0.5705705881118774, "logits/chosen": 397.2193603515625, "logits/rejected": 392.36126708984375, "logps/chosen": -1.761604905128479, "logps/rejected": -2.098273515701294, "loss": 2.0169, "nll_loss": 1.894805908203125, "rewards/accuracies": 0.75, "rewards/chosen": -0.8808024525642395, "rewards/margins": 0.16833437979221344, "rewards/rejected": -1.049136757850647, "step": 70 }, { "epoch": 0.44642857142857145, "grad_norm": 81.70245361328125, "learning_rate": 7.500000000000001e-06, "log_odds_chosen": 0.6616519689559937, "log_odds_ratio": -0.5056720972061157, "logits/chosen": 399.26202392578125, "logits/rejected": 387.3080139160156, "logps/chosen": -1.2724144458770752, "logps/rejected": -1.7848689556121826, "loss": 1.9258, "nll_loss": 1.534188985824585, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6362072229385376, "rewards/margins": 0.2562272548675537, "rewards/rejected": -0.8924344778060913, "step": 75 }, { "epoch": 0.47619047619047616, "grad_norm": 50.36211395263672, "learning_rate": 8.000000000000001e-06, "log_odds_chosen": 0.6271190643310547, "log_odds_ratio": -0.5201026797294617, "logits/chosen": 406.4871520996094, "logits/rejected": 419.87884521484375, "logps/chosen": -1.585078239440918, "logps/rejected": -2.038538694381714, "loss": 2.0136, "nll_loss": 1.7463384866714478, "rewards/accuracies": 0.75, "rewards/chosen": -0.792539119720459, "rewards/margins": 0.22673015296459198, "rewards/rejected": -1.019269347190857, "step": 80 }, { "epoch": 0.5059523809523809, "grad_norm": 63.27934265136719, "learning_rate": 8.5e-06, "log_odds_chosen": -0.1369583010673523, "log_odds_ratio": -0.9432764053344727, "logits/chosen": 416.71893310546875, "logits/rejected": 396.90789794921875, "logps/chosen": -2.0069127082824707, "logps/rejected": -1.7957664728164673, "loss": 2.0904, "nll_loss": 2.148869514465332, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0034563541412354, "rewards/margins": -0.10557299852371216, "rewards/rejected": -0.8978832364082336, "step": 85 }, { "epoch": 0.5357142857142857, "grad_norm": 43.4862060546875, "learning_rate": 9e-06, "log_odds_chosen": 1.0741676092147827, "log_odds_ratio": -0.4483945965766907, "logits/chosen": 430.83160400390625, "logits/rejected": 437.00006103515625, "logps/chosen": -1.3649173974990845, "logps/rejected": -2.2547924518585205, "loss": 1.8406, "nll_loss": 1.6345653533935547, "rewards/accuracies": 0.75, "rewards/chosen": -0.6824586987495422, "rewards/margins": 0.444937527179718, "rewards/rejected": -1.1273962259292603, "step": 90 }, { "epoch": 0.5654761904761905, "grad_norm": 158.6744384765625, "learning_rate": 9.5e-06, "log_odds_chosen": 0.3580256700515747, "log_odds_ratio": -0.5983911752700806, "logits/chosen": 365.0293273925781, "logits/rejected": 395.13739013671875, "logps/chosen": -1.2539499998092651, "logps/rejected": -1.4669833183288574, "loss": 1.9122, "nll_loss": 1.5921684503555298, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6269749999046326, "rewards/margins": 0.10651664435863495, "rewards/rejected": -0.7334916591644287, "step": 95 }, { "epoch": 0.5952380952380952, "grad_norm": 60.67824172973633, "learning_rate": 1e-05, "log_odds_chosen": 0.5569584965705872, "log_odds_ratio": -0.5973536372184753, "logits/chosen": 427.83270263671875, "logits/rejected": 444.78289794921875, "logps/chosen": -1.552487850189209, "logps/rejected": -2.052288770675659, "loss": 1.9997, "nll_loss": 1.694284439086914, "rewards/accuracies": 0.75, "rewards/chosen": -0.7762439250946045, "rewards/margins": 0.2499004304409027, "rewards/rejected": -1.0261443853378296, "step": 100 }, { "epoch": 0.625, "grad_norm": 66.58687591552734, "learning_rate": 9.759000729485331e-06, "log_odds_chosen": 0.6742503643035889, "log_odds_ratio": -0.5492368340492249, "logits/chosen": 421.02435302734375, "logits/rejected": 404.02593994140625, "logps/chosen": -1.4385154247283936, "logps/rejected": -1.9352591037750244, "loss": 1.9337, "nll_loss": 1.8405609130859375, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7192577123641968, "rewards/margins": 0.24837179481983185, "rewards/rejected": -0.9676295518875122, "step": 105 }, { "epoch": 0.6547619047619048, "grad_norm": 378.0513916015625, "learning_rate": 9.534625892455923e-06, "log_odds_chosen": 0.7585327625274658, "log_odds_ratio": -0.467681884765625, "logits/chosen": 386.55181884765625, "logits/rejected": 365.63861083984375, "logps/chosen": -1.359151840209961, "logps/rejected": -1.9564090967178345, "loss": 2.0541, "nll_loss": 1.9982373714447021, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6795759201049805, "rewards/margins": 0.29862865805625916, "rewards/rejected": -0.9782045483589172, "step": 110 }, { "epoch": 0.6845238095238095, "grad_norm": 68.57817077636719, "learning_rate": 9.325048082403139e-06, "log_odds_chosen": 0.9356454610824585, "log_odds_ratio": -0.41365212202072144, "logits/chosen": 406.5351867675781, "logits/rejected": 452.47161865234375, "logps/chosen": -1.2247552871704102, "logps/rejected": -1.8875564336776733, "loss": 2.0096, "nll_loss": 1.6129186153411865, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.6123776435852051, "rewards/margins": 0.33140069246292114, "rewards/rejected": -0.9437782168388367, "step": 115 }, { "epoch": 0.7142857142857143, "grad_norm": 70.14920806884766, "learning_rate": 9.12870929175277e-06, "log_odds_chosen": 0.6218617558479309, "log_odds_ratio": -0.5581934452056885, "logits/chosen": 400.44232177734375, "logits/rejected": 403.1569519042969, "logps/chosen": -1.2707955837249756, "logps/rejected": -1.6316139698028564, "loss": 2.0261, "nll_loss": 1.7148948907852173, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6353977918624878, "rewards/margins": 0.18040914833545685, "rewards/rejected": -0.8158069849014282, "step": 120 }, { "epoch": 0.7440476190476191, "grad_norm": 28.353559494018555, "learning_rate": 8.94427190999916e-06, "log_odds_chosen": 0.06747283786535263, "log_odds_ratio": -0.7747208476066589, "logits/chosen": 394.8547668457031, "logits/rejected": 402.99114990234375, "logps/chosen": -1.4064565896987915, "logps/rejected": -1.443865180015564, "loss": 1.9533, "nll_loss": 1.529036283493042, "rewards/accuracies": 0.5, "rewards/chosen": -0.7032282948493958, "rewards/margins": 0.018704283982515335, "rewards/rejected": -0.721932590007782, "step": 125 }, { "epoch": 0.7738095238095238, "grad_norm": 50.000038146972656, "learning_rate": 8.770580193070294e-06, "log_odds_chosen": 0.487846702337265, "log_odds_ratio": -0.62076336145401, "logits/chosen": 419.7767639160156, "logits/rejected": 387.2704772949219, "logps/chosen": -1.2864845991134644, "logps/rejected": -1.5687696933746338, "loss": 1.9848, "nll_loss": 1.5705018043518066, "rewards/accuracies": 0.5, "rewards/chosen": -0.6432422995567322, "rewards/margins": 0.14114244282245636, "rewards/rejected": -0.7843848466873169, "step": 130 }, { "epoch": 0.8035714285714286, "grad_norm": 32.915321350097656, "learning_rate": 8.606629658238705e-06, "log_odds_chosen": 0.5996901988983154, "log_odds_ratio": -0.5842069387435913, "logits/chosen": 392.77679443359375, "logits/rejected": 423.60272216796875, "logps/chosen": -1.3194632530212402, "logps/rejected": -1.8708839416503906, "loss": 1.9914, "nll_loss": 1.6256046295166016, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6597316265106201, "rewards/margins": 0.2757102847099304, "rewards/rejected": -0.9354419708251953, "step": 135 }, { "epoch": 0.8333333333333334, "grad_norm": 60.51993179321289, "learning_rate": 8.451542547285167e-06, "log_odds_chosen": 0.14726313948631287, "log_odds_ratio": -0.6693505048751831, "logits/chosen": 411.6986389160156, "logits/rejected": 375.326416015625, "logps/chosen": -1.321712851524353, "logps/rejected": -1.4512436389923096, "loss": 1.9797, "nll_loss": 1.6686627864837646, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6608564257621765, "rewards/margins": 0.0647653192281723, "rewards/rejected": -0.7256218194961548, "step": 140 }, { "epoch": 0.8630952380952381, "grad_norm": 31.022499084472656, "learning_rate": 8.304547985373997e-06, "log_odds_chosen": 0.1282106339931488, "log_odds_ratio": -0.6646836996078491, "logits/chosen": 381.526611328125, "logits/rejected": 395.548095703125, "logps/chosen": -1.3501091003417969, "logps/rejected": -1.4500309228897095, "loss": 1.9768, "nll_loss": 1.590570330619812, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6750545501708984, "rewards/margins": 0.0499609112739563, "rewards/rejected": -0.7250154614448547, "step": 145 }, { "epoch": 0.8928571428571429, "grad_norm": 48.54278564453125, "learning_rate": 8.164965809277262e-06, "log_odds_chosen": 1.1463629007339478, "log_odds_ratio": -0.46680259704589844, "logits/chosen": 403.89031982421875, "logits/rejected": 409.86151123046875, "logps/chosen": -1.3419028520584106, "logps/rejected": -2.324528217315674, "loss": 1.901, "nll_loss": 1.6430673599243164, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6709514260292053, "rewards/margins": 0.49131274223327637, "rewards/rejected": -1.162264108657837, "step": 150 }, { "epoch": 0.9226190476190477, "grad_norm": 44.35458755493164, "learning_rate": 8.03219328902499e-06, "log_odds_chosen": 0.22458314895629883, "log_odds_ratio": -0.6567160487174988, "logits/chosen": 399.331298828125, "logits/rejected": 395.67767333984375, "logps/chosen": -1.445773959159851, "logps/rejected": -1.61488938331604, "loss": 1.8732, "nll_loss": 1.569331407546997, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.7228869795799255, "rewards/margins": 0.08455771207809448, "rewards/rejected": -0.80744469165802, "step": 155 }, { "epoch": 0.9523809523809523, "grad_norm": 44.52214431762695, "learning_rate": 7.905694150420949e-06, "log_odds_chosen": 0.3395001292228699, "log_odds_ratio": -0.6508662104606628, "logits/chosen": 404.9471435546875, "logits/rejected": 443.93865966796875, "logps/chosen": -1.2228091955184937, "logps/rejected": -1.4965049028396606, "loss": 1.8769, "nll_loss": 1.4934804439544678, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.6114045977592468, "rewards/margins": 0.13684777915477753, "rewards/rejected": -0.7482524514198303, "step": 160 }, { "epoch": 0.9821428571428571, "grad_norm": 51.537784576416016, "learning_rate": 7.78498944161523e-06, "log_odds_chosen": 0.2777930200099945, "log_odds_ratio": -0.6248766779899597, "logits/chosen": 440.94366455078125, "logits/rejected": 444.90240478515625, "logps/chosen": -1.3909051418304443, "logps/rejected": -1.5702444314956665, "loss": 1.9721, "nll_loss": 1.7042747735977173, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6954525709152222, "rewards/margins": 0.08966972678899765, "rewards/rejected": -0.7851222157478333, "step": 165 }, { "epoch": 1.0, "eval_log_odds_chosen": 0.26492369174957275, "eval_log_odds_ratio": -0.6573201417922974, "eval_logits/chosen": 336.2867126464844, "eval_logits/rejected": 282.1214904785156, "eval_logps/chosen": -1.2143747806549072, "eval_logps/rejected": -1.4054285287857056, "eval_loss": 1.9526195526123047, "eval_nll_loss": 1.6514703035354614, "eval_rewards/accuracies": 0.5571428537368774, "eval_rewards/chosen": -0.6071873903274536, "eval_rewards/margins": 0.09552692621946335, "eval_rewards/rejected": -0.7027142643928528, "eval_runtime": 201.1918, "eval_samples_per_second": 2.749, "eval_steps_per_second": 0.348, "step": 168 }, { "epoch": 1.0119047619047619, "grad_norm": 31.192705154418945, "learning_rate": 7.669649888473705e-06, "log_odds_chosen": 0.9334543347358704, "log_odds_ratio": -0.48719945549964905, "logits/chosen": 377.3502502441406, "logits/rejected": 415.46356201171875, "logps/chosen": -1.0376964807510376, "logps/rejected": -1.7006464004516602, "loss": 1.7058, "nll_loss": 1.2853295803070068, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5188482403755188, "rewards/margins": 0.3314751386642456, "rewards/rejected": -0.8503232002258301, "step": 170 }, { "epoch": 1.0416666666666667, "grad_norm": 23.733516693115234, "learning_rate": 7.559289460184545e-06, "log_odds_chosen": 1.4257056713104248, "log_odds_ratio": -0.36321204900741577, "logits/chosen": 406.3605651855469, "logits/rejected": 437.1375427246094, "logps/chosen": -0.8629204034805298, "logps/rejected": -1.8850151300430298, "loss": 1.2977, "nll_loss": 1.387385368347168, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4314602017402649, "rewards/margins": 0.5110472440719604, "rewards/rejected": -0.9425075650215149, "step": 175 }, { "epoch": 1.0714285714285714, "grad_norm": 24.075864791870117, "learning_rate": 7.4535599249993e-06, "log_odds_chosen": 1.4416855573654175, "log_odds_ratio": -0.30401644110679626, "logits/chosen": 357.9011535644531, "logits/rejected": 352.5140380859375, "logps/chosen": -0.899684727191925, "logps/rejected": -1.9276078939437866, "loss": 1.3288, "nll_loss": 1.3482264280319214, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4498423635959625, "rewards/margins": 0.513961672782898, "rewards/rejected": -0.9638039469718933, "step": 180 }, { "epoch": 1.1011904761904763, "grad_norm": 39.60210418701172, "learning_rate": 7.352146220938079e-06, "log_odds_chosen": 2.3201422691345215, "log_odds_ratio": -0.20258066058158875, "logits/chosen": 449.66485595703125, "logits/rejected": 380.39923095703125, "logps/chosen": -0.8746849894523621, "logps/rejected": -2.625277042388916, "loss": 1.2651, "nll_loss": 1.2770421504974365, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.43734249472618103, "rewards/margins": 0.8752959370613098, "rewards/rejected": -1.312638521194458, "step": 185 }, { "epoch": 1.130952380952381, "grad_norm": 19.941560745239258, "learning_rate": 7.254762501100117e-06, "log_odds_chosen": 1.2973986864089966, "log_odds_ratio": -0.36347365379333496, "logits/chosen": 403.78106689453125, "logits/rejected": 423.5147399902344, "logps/chosen": -0.9356532096862793, "logps/rejected": -1.7905277013778687, "loss": 1.3584, "nll_loss": 1.2576459646224976, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.46782660484313965, "rewards/margins": 0.4274372160434723, "rewards/rejected": -0.8952638506889343, "step": 190 }, { "epoch": 1.1607142857142858, "grad_norm": 22.545778274536133, "learning_rate": 7.1611487403943295e-06, "log_odds_chosen": 1.494046688079834, "log_odds_ratio": -0.28673815727233887, "logits/chosen": 383.939697265625, "logits/rejected": 414.34100341796875, "logps/chosen": -1.043168067932129, "logps/rejected": -2.1456902027130127, "loss": 1.3101, "nll_loss": 1.2422401905059814, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5215840339660645, "rewards/margins": 0.5512610673904419, "rewards/rejected": -1.0728451013565063, "step": 195 }, { "epoch": 1.1904761904761905, "grad_norm": 29.15802574157715, "learning_rate": 7.0710678118654756e-06, "log_odds_chosen": 2.355477809906006, "log_odds_ratio": -0.23161384463310242, "logits/chosen": 443.63848876953125, "logits/rejected": 404.1456298828125, "logps/chosen": -0.6128617525100708, "logps/rejected": -2.293651580810547, "loss": 1.2885, "nll_loss": 1.0000107288360596, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3064308762550354, "rewards/margins": 0.8403949737548828, "rewards/rejected": -1.1468257904052734, "step": 200 }, { "epoch": 1.2202380952380953, "grad_norm": 25.433277130126953, "learning_rate": 6.984302957695783e-06, "log_odds_chosen": 2.206387758255005, "log_odds_ratio": -0.20685645937919617, "logits/chosen": 345.31134033203125, "logits/rejected": 396.1018371582031, "logps/chosen": -0.8278995752334595, "logps/rejected": -2.5073094367980957, "loss": 1.2908, "nll_loss": 1.2252819538116455, "rewards/accuracies": 1.0, "rewards/chosen": -0.41394978761672974, "rewards/margins": 0.8397049903869629, "rewards/rejected": -1.2536547183990479, "step": 205 }, { "epoch": 1.25, "grad_norm": 20.637460708618164, "learning_rate": 6.900655593423542e-06, "log_odds_chosen": 1.841051697731018, "log_odds_ratio": -0.25199171900749207, "logits/chosen": 375.1767272949219, "logits/rejected": 373.2859191894531, "logps/chosen": -0.8490890264511108, "logps/rejected": -2.084282159805298, "loss": 1.1767, "nll_loss": 1.2052141427993774, "rewards/accuracies": 1.0, "rewards/chosen": -0.4245445132255554, "rewards/margins": 0.617596447467804, "rewards/rejected": -1.042141079902649, "step": 210 }, { "epoch": 1.2797619047619047, "grad_norm": 44.84423065185547, "learning_rate": 6.819943394704736e-06, "log_odds_chosen": 2.0608296394348145, "log_odds_ratio": -0.26352304220199585, "logits/chosen": 417.9048767089844, "logits/rejected": 423.32275390625, "logps/chosen": -0.8630622029304504, "logps/rejected": -2.449791669845581, "loss": 1.2804, "nll_loss": 1.2531594038009644, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.4315311014652252, "rewards/margins": 0.7933648824691772, "rewards/rejected": -1.2248958349227905, "step": 215 }, { "epoch": 1.3095238095238095, "grad_norm": 27.052371978759766, "learning_rate": 6.741998624632421e-06, "log_odds_chosen": 1.9642305374145508, "log_odds_ratio": -0.2985631823539734, "logits/chosen": 365.51226806640625, "logits/rejected": 383.2918395996094, "logps/chosen": -0.7986623048782349, "logps/rejected": -2.1426806449890137, "loss": 1.2905, "nll_loss": 1.1290483474731445, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.39933115243911743, "rewards/margins": 0.6720091700553894, "rewards/rejected": -1.0713403224945068, "step": 220 }, { "epoch": 1.3392857142857144, "grad_norm": 33.49964141845703, "learning_rate": 6.666666666666667e-06, "log_odds_chosen": 1.2250487804412842, "log_odds_ratio": -0.34452176094055176, "logits/chosen": 399.16046142578125, "logits/rejected": 376.3241882324219, "logps/chosen": -1.1176211833953857, "logps/rejected": -2.0609307289123535, "loss": 1.2819, "nll_loss": 1.3445219993591309, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5588105916976929, "rewards/margins": 0.47165459394454956, "rewards/rejected": -1.0304653644561768, "step": 225 }, { "epoch": 1.369047619047619, "grad_norm": 20.881301879882812, "learning_rate": 6.593804733957872e-06, "log_odds_chosen": 2.1563384532928467, "log_odds_ratio": -0.24819080531597137, "logits/chosen": 367.4872131347656, "logits/rejected": 376.99822998046875, "logps/chosen": -0.9552156329154968, "logps/rejected": -2.5535261631011963, "loss": 1.2569, "nll_loss": 0.9765374064445496, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.4776078164577484, "rewards/margins": 0.7991552948951721, "rewards/rejected": -1.2767630815505981, "step": 230 }, { "epoch": 1.3988095238095237, "grad_norm": 20.453393936157227, "learning_rate": 6.523280730534423e-06, "log_odds_chosen": 2.148529529571533, "log_odds_ratio": -0.2209801971912384, "logits/chosen": 396.610595703125, "logits/rejected": 402.3414306640625, "logps/chosen": -0.6876020431518555, "logps/rejected": -2.1799914836883545, "loss": 1.2833, "nll_loss": 1.0352197885513306, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.34380102157592773, "rewards/margins": 0.7461946606636047, "rewards/rejected": -1.0899957418441772, "step": 235 }, { "epoch": 1.4285714285714286, "grad_norm": 23.43726348876953, "learning_rate": 6.4549722436790284e-06, "log_odds_chosen": 1.4042203426361084, "log_odds_ratio": -0.30760836601257324, "logits/chosen": 426.6971740722656, "logits/rejected": 434.68292236328125, "logps/chosen": -0.9825772047042847, "logps/rejected": -2.0324692726135254, "loss": 1.258, "nll_loss": 1.1937551498413086, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.49128860235214233, "rewards/margins": 0.5249461531639099, "rewards/rejected": -1.0162346363067627, "step": 240 }, { "epoch": 1.4583333333333333, "grad_norm": 20.512117385864258, "learning_rate": 6.3887656499994e-06, "log_odds_chosen": 1.7208242416381836, "log_odds_ratio": -0.2485727071762085, "logits/chosen": 406.27337646484375, "logits/rejected": 381.36297607421875, "logps/chosen": -0.7839127779006958, "logps/rejected": -2.025153398513794, "loss": 1.3187, "nll_loss": 1.2436693906784058, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3919563889503479, "rewards/margins": 0.6206203699111938, "rewards/rejected": -1.012576699256897, "step": 245 }, { "epoch": 1.4880952380952381, "grad_norm": 31.212268829345703, "learning_rate": 6.324555320336759e-06, "log_odds_chosen": 2.171504497528076, "log_odds_ratio": -0.2626289427280426, "logits/chosen": 385.3293151855469, "logits/rejected": 403.4792785644531, "logps/chosen": -0.912436842918396, "logps/rejected": -2.581498146057129, "loss": 1.2409, "nll_loss": 0.9795435070991516, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.456218421459198, "rewards/margins": 0.8345306515693665, "rewards/rejected": -1.2907490730285645, "step": 250 }, { "epoch": 1.5178571428571428, "grad_norm": 15.0794095993042, "learning_rate": 6.262242910851496e-06, "log_odds_chosen": 1.983006238937378, "log_odds_ratio": -0.19495443999767303, "logits/chosen": 411.68487548828125, "logits/rejected": 416.602783203125, "logps/chosen": -0.9126818776130676, "logps/rejected": -2.4313483238220215, "loss": 1.2696, "nll_loss": 1.102388620376587, "rewards/accuracies": 1.0, "rewards/chosen": -0.4563409388065338, "rewards/margins": 0.7593332529067993, "rewards/rejected": -1.2156741619110107, "step": 255 }, { "epoch": 1.5476190476190477, "grad_norm": 27.49896812438965, "learning_rate": 6.2017367294604225e-06, "log_odds_chosen": 1.9481725692749023, "log_odds_ratio": -0.2753888964653015, "logits/chosen": 386.07427978515625, "logits/rejected": 435.198974609375, "logps/chosen": -0.995970606803894, "logps/rejected": -2.5512473583221436, "loss": 1.2264, "nll_loss": 1.1618086099624634, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.497985303401947, "rewards/margins": 0.7776384353637695, "rewards/rejected": -1.2756236791610718, "step": 260 }, { "epoch": 1.5773809523809523, "grad_norm": 23.22437286376953, "learning_rate": 6.142951168339513e-06, "log_odds_chosen": 1.3944361209869385, "log_odds_ratio": -0.4270511567592621, "logits/chosen": 435.14678955078125, "logits/rejected": 427.51226806640625, "logps/chosen": -1.104048252105713, "logps/rejected": -2.160891056060791, "loss": 1.4157, "nll_loss": 1.3079249858856201, "rewards/accuracies": 0.75, "rewards/chosen": -0.5520241260528564, "rewards/margins": 0.5284214019775391, "rewards/rejected": -1.0804455280303955, "step": 265 }, { "epoch": 1.6071428571428572, "grad_norm": 16.965179443359375, "learning_rate": 6.0858061945018455e-06, "log_odds_chosen": 1.3110793828964233, "log_odds_ratio": -0.33642929792404175, "logits/chosen": 370.2662658691406, "logits/rejected": 376.83587646484375, "logps/chosen": -1.1280148029327393, "logps/rejected": -2.0509979724884033, "loss": 1.3064, "nll_loss": 1.3204824924468994, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.5640074014663696, "rewards/margins": 0.46149152517318726, "rewards/rejected": -1.0254989862442017, "step": 270 }, { "epoch": 1.6369047619047619, "grad_norm": 16.231523513793945, "learning_rate": 6.030226891555273e-06, "log_odds_chosen": 1.2828176021575928, "log_odds_ratio": -0.31571871042251587, "logits/chosen": 336.78143310546875, "logits/rejected": 352.0708312988281, "logps/chosen": -0.8390641212463379, "logps/rejected": -1.6753209829330444, "loss": 1.2993, "nll_loss": 1.0436512231826782, "rewards/accuracies": 1.0, "rewards/chosen": -0.41953206062316895, "rewards/margins": 0.4181283414363861, "rewards/rejected": -0.8376604914665222, "step": 275 }, { "epoch": 1.6666666666666665, "grad_norm": 19.623210906982422, "learning_rate": 5.976143046671968e-06, "log_odds_chosen": 1.4294779300689697, "log_odds_ratio": -0.3383074104785919, "logits/chosen": 406.8611145019531, "logits/rejected": 369.0583190917969, "logps/chosen": -0.7519195675849915, "logps/rejected": -1.7197290658950806, "loss": 1.3075, "nll_loss": 1.0520720481872559, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3759597837924957, "rewards/margins": 0.4839046597480774, "rewards/rejected": -0.8598645329475403, "step": 280 }, { "epoch": 1.6964285714285714, "grad_norm": 27.15911293029785, "learning_rate": 5.923488777590924e-06, "log_odds_chosen": 1.6469166278839111, "log_odds_ratio": -0.31998661160469055, "logits/chosen": 351.8115234375, "logits/rejected": 378.2767333984375, "logps/chosen": -0.8496532440185547, "logps/rejected": -2.034947156906128, "loss": 1.2238, "nll_loss": 1.0820589065551758, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.42482662200927734, "rewards/margins": 0.5926468968391418, "rewards/rejected": -1.017473578453064, "step": 285 }, { "epoch": 1.7261904761904763, "grad_norm": 20.430383682250977, "learning_rate": 5.8722021951470355e-06, "log_odds_chosen": 2.2868614196777344, "log_odds_ratio": -0.23290471732616425, "logits/chosen": 376.9328918457031, "logits/rejected": 444.3408203125, "logps/chosen": -0.8962064981460571, "logps/rejected": -2.7214858531951904, "loss": 1.2798, "nll_loss": 1.1513203382492065, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.44810324907302856, "rewards/margins": 0.9126396179199219, "rewards/rejected": -1.3607429265975952, "step": 290 }, { "epoch": 1.755952380952381, "grad_norm": 23.614900588989258, "learning_rate": 5.822225097395821e-06, "log_odds_chosen": 2.100496768951416, "log_odds_ratio": -0.2871156930923462, "logits/chosen": 391.25726318359375, "logits/rejected": 399.76214599609375, "logps/chosen": -1.0318877696990967, "logps/rejected": -2.5897607803344727, "loss": 1.318, "nll_loss": 1.339220404624939, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5159438848495483, "rewards/margins": 0.778936505317688, "rewards/rejected": -1.2948803901672363, "step": 295 }, { "epoch": 1.7857142857142856, "grad_norm": 24.594526290893555, "learning_rate": 5.773502691896259e-06, "log_odds_chosen": 1.4617587327957153, "log_odds_ratio": -0.268736869096756, "logits/chosen": 392.9894714355469, "logits/rejected": 392.593994140625, "logps/chosen": -0.716581404209137, "logps/rejected": -1.6443252563476562, "loss": 1.2342, "nll_loss": 1.0071418285369873, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3582907021045685, "rewards/margins": 0.46387186646461487, "rewards/rejected": -0.8221626281738281, "step": 300 }, { "epoch": 1.8154761904761905, "grad_norm": 18.39592742919922, "learning_rate": 5.725983343138682e-06, "log_odds_chosen": 1.3188327550888062, "log_odds_ratio": -0.39420002698898315, "logits/chosen": 443.49505615234375, "logits/rejected": 423.3663635253906, "logps/chosen": -1.0477676391601562, "logps/rejected": -2.009697914123535, "loss": 1.2633, "nll_loss": 1.2299957275390625, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5238838195800781, "rewards/margins": 0.4809652864933014, "rewards/rejected": -1.0048489570617676, "step": 305 }, { "epoch": 1.8452380952380953, "grad_norm": 16.925289154052734, "learning_rate": 5.679618342470648e-06, "log_odds_chosen": 1.8564685583114624, "log_odds_ratio": -0.2928754985332489, "logits/chosen": 358.899169921875, "logits/rejected": 364.58160400390625, "logps/chosen": -0.94548100233078, "logps/rejected": -2.3730039596557617, "loss": 1.2961, "nll_loss": 1.2056444883346558, "rewards/accuracies": 1.0, "rewards/chosen": -0.47274050116539, "rewards/margins": 0.7137616276741028, "rewards/rejected": -1.1865019798278809, "step": 310 }, { "epoch": 1.875, "grad_norm": 27.0323543548584, "learning_rate": 5.63436169819011e-06, "log_odds_chosen": 1.889154076576233, "log_odds_ratio": -0.3951663076877594, "logits/chosen": 385.2926025390625, "logits/rejected": 391.605712890625, "logps/chosen": -1.166025161743164, "logps/rejected": -2.7089405059814453, "loss": 1.3311, "nll_loss": 1.3940035104751587, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.583012580871582, "rewards/margins": 0.7714576125144958, "rewards/rejected": -1.3544702529907227, "step": 315 }, { "epoch": 1.9047619047619047, "grad_norm": 18.699766159057617, "learning_rate": 5.590169943749475e-06, "log_odds_chosen": 1.6885192394256592, "log_odds_ratio": -0.2244713306427002, "logits/chosen": 402.7138366699219, "logits/rejected": 435.62646484375, "logps/chosen": -0.8159521222114563, "logps/rejected": -2.033271551132202, "loss": 1.2117, "nll_loss": 0.9898951649665833, "rewards/accuracies": 1.0, "rewards/chosen": -0.40797606110572815, "rewards/margins": 0.6086598634719849, "rewards/rejected": -1.016635775566101, "step": 320 }, { "epoch": 1.9345238095238095, "grad_norm": 29.9803409576416, "learning_rate": 5.547001962252292e-06, "log_odds_chosen": 2.429278612136841, "log_odds_ratio": -0.19841960072517395, "logits/chosen": 411.4713439941406, "logits/rejected": 433.3556213378906, "logps/chosen": -0.8200858235359192, "logps/rejected": -2.58778977394104, "loss": 1.1848, "nll_loss": 1.0703952312469482, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.4100429117679596, "rewards/margins": 0.8838518857955933, "rewards/rejected": -1.29389488697052, "step": 325 }, { "epoch": 1.9642857142857144, "grad_norm": 15.92293643951416, "learning_rate": 5.504818825631804e-06, "log_odds_chosen": 1.846983551979065, "log_odds_ratio": -0.29716789722442627, "logits/chosen": 407.4903869628906, "logits/rejected": 413.0613708496094, "logps/chosen": -0.9037710428237915, "logps/rejected": -2.2958996295928955, "loss": 1.245, "nll_loss": 0.9782114028930664, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.45188552141189575, "rewards/margins": 0.6960643529891968, "rewards/rejected": -1.1479498147964478, "step": 330 }, { "epoch": 1.994047619047619, "grad_norm": 15.895071029663086, "learning_rate": 5.4635836470815305e-06, "log_odds_chosen": 1.2502014636993408, "log_odds_ratio": -0.3804013133049011, "logits/chosen": 390.8824157714844, "logits/rejected": 404.8577575683594, "logps/chosen": -1.2015631198883057, "logps/rejected": -2.143710136413574, "loss": 1.3299, "nll_loss": 1.439396619796753, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6007815599441528, "rewards/margins": 0.47107353806495667, "rewards/rejected": -1.071855068206787, "step": 335 }, { "epoch": 2.0, "eval_log_odds_chosen": 0.2173391729593277, "eval_log_odds_ratio": -0.6792100071907043, "eval_logits/chosen": 345.2332763671875, "eval_logits/rejected": 293.2820129394531, "eval_logps/chosen": -1.1971725225448608, "eval_logps/rejected": -1.3610831499099731, "eval_loss": 1.9015214443206787, "eval_nll_loss": 1.5933443307876587, "eval_rewards/accuracies": 0.5, "eval_rewards/chosen": -0.5985862612724304, "eval_rewards/margins": 0.08195527642965317, "eval_rewards/rejected": -0.6805415749549866, "eval_runtime": 201.3424, "eval_samples_per_second": 2.747, "eval_steps_per_second": 0.348, "step": 336 }, { "epoch": 2.0238095238095237, "grad_norm": 18.064809799194336, "learning_rate": 5.423261445466404e-06, "log_odds_chosen": 3.1630072593688965, "log_odds_ratio": -0.14342114329338074, "logits/chosen": 407.103759765625, "logits/rejected": 396.9122009277344, "logps/chosen": -0.5497515201568604, "logps/rejected": -2.797853946685791, "loss": 0.7506, "nll_loss": 0.7164371609687805, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2748757600784302, "rewards/margins": 1.1240513324737549, "rewards/rejected": -1.3989269733428955, "step": 340 }, { "epoch": 2.0535714285714284, "grad_norm": 12.874629974365234, "learning_rate": 5.383819020581656e-06, "log_odds_chosen": 4.300416946411133, "log_odds_ratio": -0.10298861563205719, "logits/chosen": 406.04144287109375, "logits/rejected": 365.400390625, "logps/chosen": -0.5357804894447327, "logps/rejected": -3.574709415435791, "loss": 0.6675, "nll_loss": 0.8001909255981445, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.26789024472236633, "rewards/margins": 1.5194646120071411, "rewards/rejected": -1.7873547077178955, "step": 345 }, { "epoch": 2.0833333333333335, "grad_norm": 13.677016258239746, "learning_rate": 5.345224838248489e-06, "log_odds_chosen": 4.409786224365234, "log_odds_ratio": -0.03061295673251152, "logits/chosen": 372.30560302734375, "logits/rejected": 401.3008728027344, "logps/chosen": -0.46024495363235474, "logps/rejected": -3.737445116043091, "loss": 0.5993, "nll_loss": 0.6028808355331421, "rewards/accuracies": 1.0, "rewards/chosen": -0.23012247681617737, "rewards/margins": 1.6386003494262695, "rewards/rejected": -1.8687225580215454, "step": 350 }, { "epoch": 2.113095238095238, "grad_norm": 10.955292701721191, "learning_rate": 5.307448924342753e-06, "log_odds_chosen": 4.361363410949707, "log_odds_ratio": -0.04152694344520569, "logits/chosen": 373.2965393066406, "logits/rejected": 396.0318603515625, "logps/chosen": -0.4621841311454773, "logps/rejected": -3.721182346343994, "loss": 0.5932, "nll_loss": 0.5964406132698059, "rewards/accuracies": 1.0, "rewards/chosen": -0.23109206557273865, "rewards/margins": 1.6294991970062256, "rewards/rejected": -1.860591173171997, "step": 355 }, { "epoch": 2.142857142857143, "grad_norm": 28.92525863647461, "learning_rate": 5.270462766947299e-06, "log_odds_chosen": 4.147698402404785, "log_odds_ratio": -0.032929155975580215, "logits/chosen": 380.5809631347656, "logits/rejected": 416.3023986816406, "logps/chosen": -0.381146639585495, "logps/rejected": -3.1521759033203125, "loss": 0.612, "nll_loss": 0.6254645586013794, "rewards/accuracies": 1.0, "rewards/chosen": -0.1905733197927475, "rewards/margins": 1.3855146169662476, "rewards/rejected": -1.5760879516601562, "step": 360 }, { "epoch": 2.1726190476190474, "grad_norm": 12.443119049072266, "learning_rate": 5.234239225902137e-06, "log_odds_chosen": 4.118858337402344, "log_odds_ratio": -0.03749427944421768, "logits/chosen": 345.683349609375, "logits/rejected": 312.6221923828125, "logps/chosen": -0.32834386825561523, "logps/rejected": -3.117326498031616, "loss": 0.5807, "nll_loss": 0.531657338142395, "rewards/accuracies": 1.0, "rewards/chosen": -0.16417193412780762, "rewards/margins": 1.3944913148880005, "rewards/rejected": -1.558663249015808, "step": 365 }, { "epoch": 2.2023809523809526, "grad_norm": 10.90052318572998, "learning_rate": 5.198752449100364e-06, "log_odds_chosen": 3.4755806922912598, "log_odds_ratio": -0.04619182273745537, "logits/chosen": 332.7835693359375, "logits/rejected": 340.1016845703125, "logps/chosen": -0.4380973279476166, "logps/rejected": -2.8499865531921387, "loss": 0.6158, "nll_loss": 0.6762995719909668, "rewards/accuracies": 1.0, "rewards/chosen": -0.2190486639738083, "rewards/margins": 1.2059446573257446, "rewards/rejected": -1.4249932765960693, "step": 370 }, { "epoch": 2.232142857142857, "grad_norm": 14.661003112792969, "learning_rate": 5.163977794943223e-06, "log_odds_chosen": 3.5086026191711426, "log_odds_ratio": -0.11584819853305817, "logits/chosen": 396.9617614746094, "logits/rejected": 412.2447814941406, "logps/chosen": -0.4914408326148987, "logps/rejected": -2.892894744873047, "loss": 0.6302, "nll_loss": 0.5993391275405884, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.24572041630744934, "rewards/margins": 1.2007267475128174, "rewards/rejected": -1.4464473724365234, "step": 375 }, { "epoch": 2.261904761904762, "grad_norm": 13.687707901000977, "learning_rate": 5.129891760425772e-06, "log_odds_chosen": 3.4452381134033203, "log_odds_ratio": -0.04614276438951492, "logits/chosen": 386.617919921875, "logits/rejected": 348.6525573730469, "logps/chosen": -0.43144264817237854, "logps/rejected": -2.679448366165161, "loss": 0.5916, "nll_loss": 0.61055588722229, "rewards/accuracies": 1.0, "rewards/chosen": -0.21572132408618927, "rewards/margins": 1.1240026950836182, "rewards/rejected": -1.3397241830825806, "step": 380 }, { "epoch": 2.2916666666666665, "grad_norm": 12.487543106079102, "learning_rate": 5.096471914376255e-06, "log_odds_chosen": 4.143012046813965, "log_odds_ratio": -0.055028241127729416, "logits/chosen": 369.7757873535156, "logits/rejected": 385.3197326660156, "logps/chosen": -0.42009004950523376, "logps/rejected": -3.1088523864746094, "loss": 0.5661, "nll_loss": 0.5417618155479431, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.21004502475261688, "rewards/margins": 1.3443810939788818, "rewards/rejected": -1.5544261932373047, "step": 385 }, { "epoch": 2.3214285714285716, "grad_norm": 14.711711883544922, "learning_rate": 5.0636968354183334e-06, "log_odds_chosen": 3.86775541305542, "log_odds_ratio": -0.051612865179777145, "logits/chosen": 386.55340576171875, "logits/rejected": 420.259033203125, "logps/chosen": -0.40563899278640747, "logps/rejected": -3.13773775100708, "loss": 0.6336, "nll_loss": 0.5235159993171692, "rewards/accuracies": 1.0, "rewards/chosen": -0.20281949639320374, "rewards/margins": 1.3660494089126587, "rewards/rejected": -1.56886887550354, "step": 390 }, { "epoch": 2.3511904761904763, "grad_norm": 14.341813087463379, "learning_rate": 5.031546054266276e-06, "log_odds_chosen": 4.3614888191223145, "log_odds_ratio": -0.02659059502184391, "logits/chosen": 381.09478759765625, "logits/rejected": 371.54730224609375, "logps/chosen": -0.3334965705871582, "logps/rejected": -3.2248425483703613, "loss": 0.6006, "nll_loss": 0.5441581010818481, "rewards/accuracies": 1.0, "rewards/chosen": -0.1667482852935791, "rewards/margins": 1.4456731081008911, "rewards/rejected": -1.6124212741851807, "step": 395 }, { "epoch": 2.380952380952381, "grad_norm": 10.141727447509766, "learning_rate": 5e-06, "log_odds_chosen": 5.030593395233154, "log_odds_ratio": -0.060914844274520874, "logits/chosen": 360.45758056640625, "logits/rejected": 407.6477966308594, "logps/chosen": -0.41616684198379517, "logps/rejected": -4.148199558258057, "loss": 0.5815, "nll_loss": 0.6026356220245361, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.20808342099189758, "rewards/margins": 1.8660163879394531, "rewards/rejected": -2.0740997791290283, "step": 400 }, { "epoch": 2.4107142857142856, "grad_norm": 12.312828063964844, "learning_rate": 4.969039949999534e-06, "log_odds_chosen": 3.7259132862091064, "log_odds_ratio": -0.05210161954164505, "logits/chosen": 441.5244140625, "logits/rejected": 379.39215087890625, "logps/chosen": -0.31666994094848633, "logps/rejected": -2.7463951110839844, "loss": 0.6506, "nll_loss": 0.5460222959518433, "rewards/accuracies": 1.0, "rewards/chosen": -0.15833497047424316, "rewards/margins": 1.214862585067749, "rewards/rejected": -1.3731975555419922, "step": 405 }, { "epoch": 2.4404761904761907, "grad_norm": 10.084498405456543, "learning_rate": 4.938647983247949e-06, "log_odds_chosen": 4.13731575012207, "log_odds_ratio": -0.07053720951080322, "logits/chosen": 375.71875, "logits/rejected": 404.51898193359375, "logps/chosen": -0.4222637116909027, "logps/rejected": -3.3887131214141846, "loss": 0.5941, "nll_loss": 0.5796958804130554, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.21113185584545135, "rewards/margins": 1.483224630355835, "rewards/rejected": -1.6943565607070923, "step": 410 }, { "epoch": 2.4702380952380953, "grad_norm": 10.309317588806152, "learning_rate": 4.9088069367381605e-06, "log_odds_chosen": 4.0256781578063965, "log_odds_ratio": -0.02946905419230461, "logits/chosen": 410.3990783691406, "logits/rejected": 388.10638427734375, "logps/chosen": -0.34235674142837524, "logps/rejected": -2.9506092071533203, "loss": 0.6024, "nll_loss": 0.5277774333953857, "rewards/accuracies": 1.0, "rewards/chosen": -0.17117837071418762, "rewards/margins": 1.3041261434555054, "rewards/rejected": -1.4753046035766602, "step": 415 }, { "epoch": 2.5, "grad_norm": 17.37739372253418, "learning_rate": 4.8795003647426654e-06, "log_odds_chosen": 4.524823188781738, "log_odds_ratio": -0.054420001804828644, "logits/chosen": 391.82403564453125, "logits/rejected": 331.8238220214844, "logps/chosen": -0.296555757522583, "logps/rejected": -3.383528470993042, "loss": 0.5781, "nll_loss": 0.6599777340888977, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1482778787612915, "rewards/margins": 1.5434863567352295, "rewards/rejected": -1.691764235496521, "step": 420 }, { "epoch": 2.5297619047619047, "grad_norm": 14.520256042480469, "learning_rate": 4.850712500726659e-06, "log_odds_chosen": 4.5146284103393555, "log_odds_ratio": -0.020355457440018654, "logits/chosen": 364.44293212890625, "logits/rejected": 381.6509094238281, "logps/chosen": -0.4376618266105652, "logps/rejected": -3.6549148559570312, "loss": 0.6064, "nll_loss": 0.7541596293449402, "rewards/accuracies": 1.0, "rewards/chosen": -0.2188309133052826, "rewards/margins": 1.6086266040802002, "rewards/rejected": -1.8274574279785156, "step": 425 }, { "epoch": 2.5595238095238093, "grad_norm": 18.008617401123047, "learning_rate": 4.822428221704122e-06, "log_odds_chosen": 4.2900166511535645, "log_odds_ratio": -0.05251041799783707, "logits/chosen": 418.76812744140625, "logits/rejected": 422.1298828125, "logps/chosen": -0.3535473942756653, "logps/rejected": -3.3458237648010254, "loss": 0.5961, "nll_loss": 0.49905306100845337, "rewards/accuracies": 1.0, "rewards/chosen": -0.17677369713783264, "rewards/margins": 1.496138334274292, "rewards/rejected": -1.6729118824005127, "step": 430 }, { "epoch": 2.5892857142857144, "grad_norm": 14.501811027526855, "learning_rate": 4.794633014853843e-06, "log_odds_chosen": 3.7202048301696777, "log_odds_ratio": -0.08040798455476761, "logits/chosen": 378.04962158203125, "logits/rejected": 374.47149658203125, "logps/chosen": -0.4141341745853424, "logps/rejected": -3.029280424118042, "loss": 0.6443, "nll_loss": 0.5974027514457703, "rewards/accuracies": 1.0, "rewards/chosen": -0.2070670872926712, "rewards/margins": 1.3075730800628662, "rewards/rejected": -1.514640212059021, "step": 435 }, { "epoch": 2.619047619047619, "grad_norm": 13.21033000946045, "learning_rate": 4.767312946227961e-06, "log_odds_chosen": 4.036019325256348, "log_odds_ratio": -0.0349675677716732, "logits/chosen": 374.5437927246094, "logits/rejected": 379.2315368652344, "logps/chosen": -0.357850581407547, "logps/rejected": -3.0918896198272705, "loss": 0.6481, "nll_loss": 0.5479155778884888, "rewards/accuracies": 1.0, "rewards/chosen": -0.1789252907037735, "rewards/margins": 1.3670194149017334, "rewards/rejected": -1.5459448099136353, "step": 440 }, { "epoch": 2.6488095238095237, "grad_norm": 10.330572128295898, "learning_rate": 4.740454631399773e-06, "log_odds_chosen": 4.903168678283691, "log_odds_ratio": -0.02322390116751194, "logits/chosen": 356.780029296875, "logits/rejected": 385.78631591796875, "logps/chosen": -0.3364141881465912, "logps/rejected": -3.9139695167541504, "loss": 0.5982, "nll_loss": 0.5599361658096313, "rewards/accuracies": 1.0, "rewards/chosen": -0.1682070940732956, "rewards/margins": 1.7887779474258423, "rewards/rejected": -1.9569847583770752, "step": 445 }, { "epoch": 2.678571428571429, "grad_norm": 11.032569885253906, "learning_rate": 4.714045207910318e-06, "log_odds_chosen": 4.497644901275635, "log_odds_ratio": -0.030209308490157127, "logits/chosen": 394.76336669921875, "logits/rejected": 376.0134582519531, "logps/chosen": -0.41811317205429077, "logps/rejected": -3.552691698074341, "loss": 0.6268, "nll_loss": 0.5461404919624329, "rewards/accuracies": 1.0, "rewards/chosen": -0.20905658602714539, "rewards/margins": 1.567289113998413, "rewards/rejected": -1.7763458490371704, "step": 450 }, { "epoch": 2.7083333333333335, "grad_norm": 17.51761245727539, "learning_rate": 4.688072309384955e-06, "log_odds_chosen": 4.075136661529541, "log_odds_ratio": -0.025673285126686096, "logits/chosen": 384.58154296875, "logits/rejected": 360.6435546875, "logps/chosen": -0.29184406995773315, "logps/rejected": -2.7901484966278076, "loss": 0.6721, "nll_loss": 0.5232545733451843, "rewards/accuracies": 1.0, "rewards/chosen": -0.14592203497886658, "rewards/margins": 1.249152421951294, "rewards/rejected": -1.3950742483139038, "step": 455 }, { "epoch": 2.738095238095238, "grad_norm": 13.356114387512207, "learning_rate": 4.662524041201569e-06, "log_odds_chosen": 3.7381584644317627, "log_odds_ratio": -0.041400760412216187, "logits/chosen": 424.15606689453125, "logits/rejected": 436.59417724609375, "logps/chosen": -0.3177054226398468, "logps/rejected": -2.7719643115997314, "loss": 0.6171, "nll_loss": 0.5559448599815369, "rewards/accuracies": 1.0, "rewards/chosen": -0.1588527113199234, "rewards/margins": 1.2271292209625244, "rewards/rejected": -1.3859821557998657, "step": 460 }, { "epoch": 2.767857142857143, "grad_norm": 16.436243057250977, "learning_rate": 4.6373889576016826e-06, "log_odds_chosen": 3.9720139503479004, "log_odds_ratio": -0.04150586202740669, "logits/chosen": 419.3877868652344, "logits/rejected": 399.4032287597656, "logps/chosen": -0.3887273669242859, "logps/rejected": -3.1958889961242676, "loss": 0.6517, "nll_loss": 0.7023509740829468, "rewards/accuracies": 1.0, "rewards/chosen": -0.19436368346214294, "rewards/margins": 1.403580665588379, "rewards/rejected": -1.5979444980621338, "step": 465 }, { "epoch": 2.7976190476190474, "grad_norm": 13.92647647857666, "learning_rate": 4.6126560401444256e-06, "log_odds_chosen": 3.6036312580108643, "log_odds_ratio": -0.0841737613081932, "logits/chosen": 376.7361145019531, "logits/rejected": 346.24578857421875, "logps/chosen": -0.38905754685401917, "logps/rejected": -2.8610401153564453, "loss": 0.5864, "nll_loss": 0.5663691163063049, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.19452877342700958, "rewards/margins": 1.235991358757019, "rewards/rejected": -1.4305200576782227, "step": 470 }, { "epoch": 2.8273809523809526, "grad_norm": 22.44719123840332, "learning_rate": 4.588314677411235e-06, "log_odds_chosen": 4.282876014709473, "log_odds_ratio": -0.02305762842297554, "logits/chosen": 351.80535888671875, "logits/rejected": 435.5419006347656, "logps/chosen": -0.376276433467865, "logps/rejected": -3.4008407592773438, "loss": 0.6159, "nll_loss": 0.5037115216255188, "rewards/accuracies": 1.0, "rewards/chosen": -0.1881382167339325, "rewards/margins": 1.512282133102417, "rewards/rejected": -1.7004203796386719, "step": 475 }, { "epoch": 2.857142857142857, "grad_norm": 11.814839363098145, "learning_rate": 4.564354645876385e-06, "log_odds_chosen": 5.438357353210449, "log_odds_ratio": -0.03407539427280426, "logits/chosen": 393.2117614746094, "logits/rejected": 400.9731140136719, "logps/chosen": -0.38579824566841125, "logps/rejected": -4.569952964782715, "loss": 0.5976, "nll_loss": 0.6172084212303162, "rewards/accuracies": 1.0, "rewards/chosen": -0.19289912283420563, "rewards/margins": 2.0920770168304443, "rewards/rejected": -2.2849764823913574, "step": 480 }, { "epoch": 2.886904761904762, "grad_norm": 12.605895042419434, "learning_rate": 4.540766091864998e-06, "log_odds_chosen": 3.7765631675720215, "log_odds_ratio": -0.03285397216677666, "logits/chosen": 376.1605529785156, "logits/rejected": 400.92620849609375, "logps/chosen": -0.4142071604728699, "logps/rejected": -3.060633659362793, "loss": 0.5919, "nll_loss": 0.5258246660232544, "rewards/accuracies": 1.0, "rewards/chosen": -0.20710358023643494, "rewards/margins": 1.3232133388519287, "rewards/rejected": -1.5303168296813965, "step": 485 }, { "epoch": 2.9166666666666665, "grad_norm": 10.655911445617676, "learning_rate": 4.517539514526257e-06, "log_odds_chosen": 3.3025474548339844, "log_odds_ratio": -0.07940138876438141, "logits/chosen": 416.90509033203125, "logits/rejected": 319.48687744140625, "logps/chosen": -0.6181944012641907, "logps/rejected": -2.8924221992492676, "loss": 0.6671, "nll_loss": 0.5718962550163269, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.30909720063209534, "rewards/margins": 1.1371139287948608, "rewards/rejected": -1.4462110996246338, "step": 490 }, { "epoch": 2.946428571428571, "grad_norm": 11.349733352661133, "learning_rate": 4.4946657497549474e-06, "log_odds_chosen": 4.556464672088623, "log_odds_ratio": -0.033807143568992615, "logits/chosen": 363.4445495605469, "logits/rejected": 406.9335021972656, "logps/chosen": -0.3999672532081604, "logps/rejected": -3.7019195556640625, "loss": 0.6043, "nll_loss": 0.5209922790527344, "rewards/accuracies": 1.0, "rewards/chosen": -0.1999836266040802, "rewards/margins": 1.6509761810302734, "rewards/rejected": -1.8509597778320312, "step": 495 }, { "epoch": 2.9761904761904763, "grad_norm": 10.738035202026367, "learning_rate": 4.47213595499958e-06, "log_odds_chosen": 4.959429740905762, "log_odds_ratio": -0.03518088907003403, "logits/chosen": 417.14154052734375, "logits/rejected": 379.5878601074219, "logps/chosen": -0.2811127007007599, "logps/rejected": -3.7553272247314453, "loss": 0.6266, "nll_loss": 0.42872363328933716, "rewards/accuracies": 1.0, "rewards/chosen": -0.14055635035037994, "rewards/margins": 1.7371070384979248, "rewards/rejected": -1.8776636123657227, "step": 500 }, { "epoch": 3.0, "eval_log_odds_chosen": 0.44554343819618225, "eval_log_odds_ratio": -0.6395100355148315, "eval_logits/chosen": 319.3428649902344, "eval_logits/rejected": 262.5517883300781, "eval_logps/chosen": -1.3908888101577759, "eval_logps/rejected": -1.6692452430725098, "eval_loss": 2.1094672679901123, "eval_nll_loss": 1.7835990190505981, "eval_rewards/accuracies": 0.5571428537368774, "eval_rewards/chosen": -0.6954444050788879, "eval_rewards/margins": 0.13917820155620575, "eval_rewards/rejected": -0.8346226215362549, "eval_runtime": 201.4063, "eval_samples_per_second": 2.746, "eval_steps_per_second": 0.348, "step": 504 }, { "epoch": 3.0, "step": 504, "total_flos": 0.0, "train_loss": 1.593297282854716, "train_runtime": 15168.4679, "train_samples_per_second": 1.061, "train_steps_per_second": 0.033 } ], "logging_steps": 5, "max_steps": 504, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }