|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 3750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3333333333333334e-08, |
|
"logits/chosen": -2.2416834831237793, |
|
"logits/rejected": -2.1367297172546387, |
|
"logps/chosen": -309.5174865722656, |
|
"logps/rejected": -533.4085693359375, |
|
"loss": 0.2593, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3333333333333336e-07, |
|
"logits/chosen": -1.8251832723617554, |
|
"logits/rejected": -1.0621190071105957, |
|
"logps/chosen": -543.5095825195312, |
|
"logps/rejected": -825.732177734375, |
|
"loss": 0.1824, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": 0.00025084687513299286, |
|
"rewards/margins": 0.0002259216271340847, |
|
"rewards/rejected": 2.4925222533056512e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.666666666666667e-07, |
|
"logits/chosen": -1.6684499979019165, |
|
"logits/rejected": -1.289470911026001, |
|
"logps/chosen": -504.35736083984375, |
|
"logps/rejected": -862.5172729492188, |
|
"loss": 0.214, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.00039741364889778197, |
|
"rewards/margins": 0.0008992180228233337, |
|
"rewards/rejected": -0.0005018044030293822, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"logits/chosen": -1.448547601699829, |
|
"logits/rejected": -1.2029615640640259, |
|
"logps/chosen": -427.83740234375, |
|
"logps/rejected": -854.1618041992188, |
|
"loss": 0.2082, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0018184988293796778, |
|
"rewards/margins": 0.002095351228490472, |
|
"rewards/rejected": -0.000276852457318455, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.333333333333335e-07, |
|
"logits/chosen": -1.6658061742782593, |
|
"logits/rejected": -1.1971313953399658, |
|
"logps/chosen": -429.181884765625, |
|
"logps/rejected": -865.9625854492188, |
|
"loss": 0.193, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.005454375874251127, |
|
"rewards/margins": 0.008814454078674316, |
|
"rewards/rejected": -0.0033600772731006145, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.666666666666667e-07, |
|
"logits/chosen": -1.4225283861160278, |
|
"logits/rejected": -1.2158631086349487, |
|
"logps/chosen": -430.7893981933594, |
|
"logps/rejected": -787.4990234375, |
|
"loss": 0.1899, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.007644618395715952, |
|
"rewards/margins": 0.01243924256414175, |
|
"rewards/rejected": -0.004794624168425798, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.000000000000001e-07, |
|
"logits/chosen": -1.47100830078125, |
|
"logits/rejected": -0.9200455546379089, |
|
"logps/chosen": -466.3695373535156, |
|
"logps/rejected": -906.1533203125, |
|
"loss": 0.1733, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.008568339981138706, |
|
"rewards/margins": 0.03259299322962761, |
|
"rewards/rejected": -0.024024656042456627, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.333333333333334e-07, |
|
"logits/chosen": -1.579919457435608, |
|
"logits/rejected": -0.9830185174942017, |
|
"logps/chosen": -454.715087890625, |
|
"logps/rejected": -852.3912353515625, |
|
"loss": 0.1825, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.01390122901648283, |
|
"rewards/margins": 0.05136318877339363, |
|
"rewards/rejected": -0.037461958825588226, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.066666666666667e-06, |
|
"logits/chosen": -1.6900907754898071, |
|
"logits/rejected": -1.3661834001541138, |
|
"logps/chosen": -492.48748779296875, |
|
"logps/rejected": -974.8717651367188, |
|
"loss": 0.1312, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.010218140669167042, |
|
"rewards/margins": 0.07319202274084091, |
|
"rewards/rejected": -0.06297388672828674, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"logits/chosen": -1.6498056650161743, |
|
"logits/rejected": -1.0119965076446533, |
|
"logps/chosen": -440.96490478515625, |
|
"logps/rejected": -855.2099609375, |
|
"loss": 0.1564, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.006226236931979656, |
|
"rewards/margins": 0.08165968954563141, |
|
"rewards/rejected": -0.07543345540761948, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"logits/chosen": -1.9902693033218384, |
|
"logits/rejected": -1.1803243160247803, |
|
"logps/chosen": -578.6461791992188, |
|
"logps/rejected": -930.4148559570312, |
|
"loss": 0.1433, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.012515301816165447, |
|
"rewards/margins": 0.10045032203197479, |
|
"rewards/rejected": -0.11296562105417252, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.4666666666666669e-06, |
|
"logits/chosen": -1.6414697170257568, |
|
"logits/rejected": -0.9264998435974121, |
|
"logps/chosen": -572.1052856445312, |
|
"logps/rejected": -975.2772216796875, |
|
"loss": 0.117, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.08218317478895187, |
|
"rewards/margins": 0.11511914432048798, |
|
"rewards/rejected": -0.19730232656002045, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"logits/chosen": -1.7165985107421875, |
|
"logits/rejected": -1.1280840635299683, |
|
"logps/chosen": -655.0675048828125, |
|
"logps/rejected": -1007.7223510742188, |
|
"loss": 0.1456, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11872265487909317, |
|
"rewards/margins": 0.10497407615184784, |
|
"rewards/rejected": -0.2236967384815216, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.7333333333333336e-06, |
|
"logits/chosen": -1.721414566040039, |
|
"logits/rejected": -1.0421576499938965, |
|
"logps/chosen": -637.6653442382812, |
|
"logps/rejected": -1049.671630859375, |
|
"loss": 0.1214, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1268424093723297, |
|
"rewards/margins": 0.13156402111053467, |
|
"rewards/rejected": -0.2584064304828644, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8666666666666669e-06, |
|
"logits/chosen": -1.8063952922821045, |
|
"logits/rejected": -1.1648313999176025, |
|
"logps/chosen": -533.8194580078125, |
|
"logps/rejected": -1127.229248046875, |
|
"loss": 0.0913, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.06718692928552628, |
|
"rewards/margins": 0.20027296245098114, |
|
"rewards/rejected": -0.2674598693847656, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": -1.8062896728515625, |
|
"logits/rejected": -1.1342815160751343, |
|
"logps/chosen": -531.7640380859375, |
|
"logps/rejected": -1056.5418701171875, |
|
"loss": 0.1041, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.09493765234947205, |
|
"rewards/margins": 0.18374750018119812, |
|
"rewards/rejected": -0.27868515253067017, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.133333333333334e-06, |
|
"logits/chosen": -1.5977694988250732, |
|
"logits/rejected": -1.2945902347564697, |
|
"logps/chosen": -567.9004516601562, |
|
"logps/rejected": -1177.0767822265625, |
|
"loss": 0.0735, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.10163917392492294, |
|
"rewards/margins": 0.22462336719036102, |
|
"rewards/rejected": -0.32626253366470337, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.266666666666667e-06, |
|
"logits/chosen": -1.5195014476776123, |
|
"logits/rejected": -0.7815272212028503, |
|
"logps/chosen": -565.3408203125, |
|
"logps/rejected": -1178.865478515625, |
|
"loss": 0.0674, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.09116648882627487, |
|
"rewards/margins": 0.2727690637111664, |
|
"rewards/rejected": -0.36393555998802185, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"logits/chosen": -1.8510783910751343, |
|
"logits/rejected": -1.1616629362106323, |
|
"logps/chosen": -542.5670166015625, |
|
"logps/rejected": -1118.016357421875, |
|
"loss": 0.0656, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.09122536331415176, |
|
"rewards/margins": 0.22348365187644958, |
|
"rewards/rejected": -0.3147090673446655, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5333333333333338e-06, |
|
"logits/chosen": -1.9452266693115234, |
|
"logits/rejected": -1.231264352798462, |
|
"logps/chosen": -637.9483032226562, |
|
"logps/rejected": -1175.119140625, |
|
"loss": 0.1008, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.12329447269439697, |
|
"rewards/margins": 0.2058517038822174, |
|
"rewards/rejected": -0.3291461765766144, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.666666666666667e-06, |
|
"logits/chosen": -1.4857038259506226, |
|
"logits/rejected": -0.7614498138427734, |
|
"logps/chosen": -659.7203979492188, |
|
"logps/rejected": -1250.89892578125, |
|
"loss": 0.1015, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16732044517993927, |
|
"rewards/margins": 0.22920766472816467, |
|
"rewards/rejected": -0.39652806520462036, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"logits/chosen": -1.6152054071426392, |
|
"logits/rejected": -1.2735153436660767, |
|
"logps/chosen": -557.020751953125, |
|
"logps/rejected": -1197.393798828125, |
|
"loss": 0.0721, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.14088398218154907, |
|
"rewards/margins": 0.26476138830184937, |
|
"rewards/rejected": -0.40564537048339844, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9333333333333338e-06, |
|
"logits/chosen": -1.4580497741699219, |
|
"logits/rejected": -0.8967218399047852, |
|
"logps/chosen": -538.6234130859375, |
|
"logps/rejected": -1041.23291015625, |
|
"loss": 0.1479, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.08591003715991974, |
|
"rewards/margins": 0.20331135392189026, |
|
"rewards/rejected": -0.2892213761806488, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.066666666666667e-06, |
|
"logits/chosen": -1.6280606985092163, |
|
"logits/rejected": -1.1948456764221191, |
|
"logps/chosen": -464.53472900390625, |
|
"logps/rejected": -1004.7527465820312, |
|
"loss": 0.0994, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.04852370172739029, |
|
"rewards/margins": 0.19602426886558533, |
|
"rewards/rejected": -0.24454793334007263, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"logits/chosen": -2.0036234855651855, |
|
"logits/rejected": -1.0634952783584595, |
|
"logps/chosen": -654.42626953125, |
|
"logps/rejected": -1130.1927490234375, |
|
"loss": 0.0947, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.14522799849510193, |
|
"rewards/margins": 0.20732179284095764, |
|
"rewards/rejected": -0.35254979133605957, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"logits/chosen": -1.987884759902954, |
|
"logits/rejected": -1.166095495223999, |
|
"logps/chosen": -544.5479736328125, |
|
"logps/rejected": -1122.8720703125, |
|
"loss": 0.0906, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.12993717193603516, |
|
"rewards/margins": 0.21453383564949036, |
|
"rewards/rejected": -0.3444710373878479, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.4666666666666672e-06, |
|
"logits/chosen": -1.532130479812622, |
|
"logits/rejected": -0.9931814074516296, |
|
"logps/chosen": -557.2303466796875, |
|
"logps/rejected": -1242.306396484375, |
|
"loss": 0.0831, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.10561565309762955, |
|
"rewards/margins": 0.23905417323112488, |
|
"rewards/rejected": -0.3446698486804962, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"logits/chosen": -1.8610786199569702, |
|
"logits/rejected": -1.1762195825576782, |
|
"logps/chosen": -624.46728515625, |
|
"logps/rejected": -1214.777099609375, |
|
"loss": 0.0777, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.10222941637039185, |
|
"rewards/margins": 0.23841162025928497, |
|
"rewards/rejected": -0.3406410217285156, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.7333333333333337e-06, |
|
"logits/chosen": -1.6525980234146118, |
|
"logits/rejected": -1.1402348279953003, |
|
"logps/chosen": -689.7327880859375, |
|
"logps/rejected": -1228.2315673828125, |
|
"loss": 0.0904, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1671145260334015, |
|
"rewards/margins": 0.22532522678375244, |
|
"rewards/rejected": -0.39243969321250916, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.866666666666667e-06, |
|
"logits/chosen": -1.7951831817626953, |
|
"logits/rejected": -1.0651658773422241, |
|
"logps/chosen": -521.4989013671875, |
|
"logps/rejected": -1288.53466796875, |
|
"loss": 0.0653, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18049229681491852, |
|
"rewards/margins": 0.28950944542884827, |
|
"rewards/rejected": -0.4700016975402832, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -1.5762369632720947, |
|
"logits/rejected": -1.2923007011413574, |
|
"logps/chosen": -643.3726806640625, |
|
"logps/rejected": -1266.006591796875, |
|
"loss": 0.0959, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2561195194721222, |
|
"rewards/margins": 0.247961163520813, |
|
"rewards/rejected": -0.5040806531906128, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.133333333333333e-06, |
|
"logits/chosen": -1.8708035945892334, |
|
"logits/rejected": -0.9332900047302246, |
|
"logps/chosen": -730.3931274414062, |
|
"logps/rejected": -1277.650390625, |
|
"loss": 0.1103, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2379586398601532, |
|
"rewards/margins": 0.25126343965530396, |
|
"rewards/rejected": -0.48922213912010193, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.266666666666668e-06, |
|
"logits/chosen": -1.7914050817489624, |
|
"logits/rejected": -1.1263148784637451, |
|
"logps/chosen": -617.783203125, |
|
"logps/rejected": -1344.7216796875, |
|
"loss": 0.0582, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.18687450885772705, |
|
"rewards/margins": 0.3094720244407654, |
|
"rewards/rejected": -0.49634653329849243, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4e-06, |
|
"logits/chosen": -1.8126357793807983, |
|
"logits/rejected": -1.3105145692825317, |
|
"logps/chosen": -567.7543334960938, |
|
"logps/rejected": -1125.550048828125, |
|
"loss": 0.117, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.17715778946876526, |
|
"rewards/margins": 0.23828192055225372, |
|
"rewards/rejected": -0.4154396951198578, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.533333333333334e-06, |
|
"logits/chosen": -1.7919952869415283, |
|
"logits/rejected": -1.0027551651000977, |
|
"logps/chosen": -792.9563598632812, |
|
"logps/rejected": -1416.2335205078125, |
|
"loss": 0.0952, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.27687594294548035, |
|
"rewards/margins": 0.27830666303634644, |
|
"rewards/rejected": -0.5551826357841492, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.666666666666667e-06, |
|
"logits/chosen": -1.93537175655365, |
|
"logits/rejected": -1.2520567178726196, |
|
"logps/chosen": -673.220947265625, |
|
"logps/rejected": -1093.1484375, |
|
"loss": 0.1006, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.20018038153648376, |
|
"rewards/margins": 0.18651911616325378, |
|
"rewards/rejected": -0.38669952750205994, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.800000000000001e-06, |
|
"logits/chosen": -1.802390456199646, |
|
"logits/rejected": -1.3325846195220947, |
|
"logps/chosen": -649.5408935546875, |
|
"logps/rejected": -1129.62890625, |
|
"loss": 0.1359, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.18187561631202698, |
|
"rewards/margins": 0.17299222946166992, |
|
"rewards/rejected": -0.3548678159713745, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.933333333333334e-06, |
|
"logits/chosen": -1.8715474605560303, |
|
"logits/rejected": -1.3402589559555054, |
|
"logps/chosen": -546.8035888671875, |
|
"logps/rejected": -1010.1611328125, |
|
"loss": 0.1034, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.05026025325059891, |
|
"rewards/margins": 0.22379866242408752, |
|
"rewards/rejected": -0.27405890822410583, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999972922944898e-06, |
|
"logits/chosen": -1.9571716785430908, |
|
"logits/rejected": -1.1372935771942139, |
|
"logps/chosen": -733.9254150390625, |
|
"logps/rejected": -1304.736328125, |
|
"loss": 0.0984, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15234608948230743, |
|
"rewards/margins": 0.2319423407316208, |
|
"rewards/rejected": -0.3842884600162506, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999756310023261e-06, |
|
"logits/chosen": -1.851304292678833, |
|
"logits/rejected": -1.3916990756988525, |
|
"logps/chosen": -548.0775146484375, |
|
"logps/rejected": -1033.42578125, |
|
"loss": 0.0848, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11450278759002686, |
|
"rewards/margins": 0.22328679263591766, |
|
"rewards/rejected": -0.3377895951271057, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999323102948655e-06, |
|
"logits/chosen": -2.0076701641082764, |
|
"logits/rejected": -1.2286336421966553, |
|
"logps/chosen": -721.4017333984375, |
|
"logps/rejected": -1361.4373779296875, |
|
"loss": 0.077, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.20651724934577942, |
|
"rewards/margins": 0.26063308119773865, |
|
"rewards/rejected": -0.46715036034584045, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998673339256785e-06, |
|
"logits/chosen": -2.0088391304016113, |
|
"logits/rejected": -1.2929044961929321, |
|
"logps/chosen": -666.7005615234375, |
|
"logps/rejected": -1329.128173828125, |
|
"loss": 0.0905, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.21113541722297668, |
|
"rewards/margins": 0.27807140350341797, |
|
"rewards/rejected": -0.48920679092407227, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997807075247147e-06, |
|
"logits/chosen": -2.1807756423950195, |
|
"logits/rejected": -1.3019901514053345, |
|
"logps/chosen": -720.1719360351562, |
|
"logps/rejected": -1275.56982421875, |
|
"loss": 0.0888, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1774640679359436, |
|
"rewards/margins": 0.26608842611312866, |
|
"rewards/rejected": -0.44355249404907227, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.996724385978142e-06, |
|
"logits/chosen": -2.1555614471435547, |
|
"logits/rejected": -1.3447341918945312, |
|
"logps/chosen": -620.5142822265625, |
|
"logps/rejected": -1299.186279296875, |
|
"loss": 0.0505, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.08281116932630539, |
|
"rewards/margins": 0.3319624364376068, |
|
"rewards/rejected": -0.4147736430168152, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995425365260585e-06, |
|
"logits/chosen": -2.018123149871826, |
|
"logits/rejected": -1.2482590675354004, |
|
"logps/chosen": -659.6309814453125, |
|
"logps/rejected": -1221.164794921875, |
|
"loss": 0.0884, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.15910136699676514, |
|
"rewards/margins": 0.2621740400791168, |
|
"rewards/rejected": -0.42127543687820435, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.993910125649561e-06, |
|
"logits/chosen": -1.6636863946914673, |
|
"logits/rejected": -0.9911340475082397, |
|
"logps/chosen": -693.9474487304688, |
|
"logps/rejected": -1334.8740234375, |
|
"loss": 0.0902, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.23787899315357208, |
|
"rewards/margins": 0.24970480799674988, |
|
"rewards/rejected": -0.48758387565612793, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.992178798434684e-06, |
|
"logits/chosen": -2.022722005844116, |
|
"logits/rejected": -1.2033276557922363, |
|
"logps/chosen": -636.1743774414062, |
|
"logps/rejected": -1268.32568359375, |
|
"loss": 0.0852, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.19134658575057983, |
|
"rewards/margins": 0.29803937673568726, |
|
"rewards/rejected": -0.4893859326839447, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990231533628719e-06, |
|
"logits/chosen": -1.8332548141479492, |
|
"logits/rejected": -1.0606578588485718, |
|
"logps/chosen": -568.590576171875, |
|
"logps/rejected": -1158.369873046875, |
|
"loss": 0.0795, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.09971268475055695, |
|
"rewards/margins": 0.267979234457016, |
|
"rewards/rejected": -0.36769190430641174, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.988068499954578e-06, |
|
"logits/chosen": -2.086530923843384, |
|
"logits/rejected": -1.4134924411773682, |
|
"logps/chosen": -491.255126953125, |
|
"logps/rejected": -967.7403564453125, |
|
"loss": 0.0949, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.09277771413326263, |
|
"rewards/margins": 0.21773691475391388, |
|
"rewards/rejected": -0.3105146288871765, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.985689884830711e-06, |
|
"logits/chosen": -1.7672725915908813, |
|
"logits/rejected": -1.2025644779205322, |
|
"logps/chosen": -747.850341796875, |
|
"logps/rejected": -1293.3529052734375, |
|
"loss": 0.1002, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.22476276755332947, |
|
"rewards/margins": 0.23183217644691467, |
|
"rewards/rejected": -0.45659494400024414, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.983095894354858e-06, |
|
"logits/chosen": -1.6667953729629517, |
|
"logits/rejected": -1.2720074653625488, |
|
"logps/chosen": -679.2232666015625, |
|
"logps/rejected": -1342.873291015625, |
|
"loss": 0.1095, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.20331433415412903, |
|
"rewards/margins": 0.3078593611717224, |
|
"rewards/rejected": -0.511173665523529, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980286753286196e-06, |
|
"logits/chosen": -1.801032304763794, |
|
"logits/rejected": -1.2592867612838745, |
|
"logps/chosen": -528.0743408203125, |
|
"logps/rejected": -1059.638427734375, |
|
"loss": 0.0847, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.10437086969614029, |
|
"rewards/margins": 0.2568288743495941, |
|
"rewards/rejected": -0.3611997663974762, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.97726270502586e-06, |
|
"logits/chosen": -1.8762857913970947, |
|
"logits/rejected": -1.14815354347229, |
|
"logps/chosen": -567.5826416015625, |
|
"logps/rejected": -1179.177490234375, |
|
"loss": 0.0764, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.09893319755792618, |
|
"rewards/margins": 0.27982866764068604, |
|
"rewards/rejected": -0.3787618577480316, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.974024011595864e-06, |
|
"logits/chosen": -1.9012178182601929, |
|
"logits/rejected": -1.498219609260559, |
|
"logps/chosen": -575.654541015625, |
|
"logps/rejected": -1156.733642578125, |
|
"loss": 0.0812, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1486634910106659, |
|
"rewards/margins": 0.2521246075630188, |
|
"rewards/rejected": -0.40078815817832947, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.970570953616383e-06, |
|
"logits/chosen": -1.5046392679214478, |
|
"logits/rejected": -1.2080037593841553, |
|
"logps/chosen": -609.6295166015625, |
|
"logps/rejected": -1234.922607421875, |
|
"loss": 0.0969, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.19593383371829987, |
|
"rewards/margins": 0.2632136940956116, |
|
"rewards/rejected": -0.45914751291275024, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.966903830281449e-06, |
|
"logits/chosen": -1.8756754398345947, |
|
"logits/rejected": -1.3740646839141846, |
|
"logps/chosen": -685.7990112304688, |
|
"logps/rejected": -1199.182373046875, |
|
"loss": 0.1186, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.20507201552391052, |
|
"rewards/margins": 0.22510738670825958, |
|
"rewards/rejected": -0.4301794171333313, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9630229593330226e-06, |
|
"logits/chosen": -2.1258938312530518, |
|
"logits/rejected": -1.3714039325714111, |
|
"logps/chosen": -674.0800170898438, |
|
"logps/rejected": -1057.264892578125, |
|
"loss": 0.0883, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.14157623052597046, |
|
"rewards/margins": 0.20917348563671112, |
|
"rewards/rejected": -0.35074976086616516, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.958928677033465e-06, |
|
"logits/chosen": -1.9999202489852905, |
|
"logits/rejected": -1.2103183269500732, |
|
"logps/chosen": -599.47607421875, |
|
"logps/rejected": -1122.523681640625, |
|
"loss": 0.0788, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1143900528550148, |
|
"rewards/margins": 0.2687007486820221, |
|
"rewards/rejected": -0.3830908238887787, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.954621338136399e-06, |
|
"logits/chosen": -1.8329057693481445, |
|
"logits/rejected": -1.1794686317443848, |
|
"logps/chosen": -716.9681396484375, |
|
"logps/rejected": -1392.547607421875, |
|
"loss": 0.1106, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2248903512954712, |
|
"rewards/margins": 0.31769150495529175, |
|
"rewards/rejected": -0.5425819158554077, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.95010131585597e-06, |
|
"logits/chosen": -1.878209114074707, |
|
"logits/rejected": -1.0662002563476562, |
|
"logps/chosen": -822.8294677734375, |
|
"logps/rejected": -1354.05810546875, |
|
"loss": 0.0824, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.24139878153800964, |
|
"rewards/margins": 0.24748913943767548, |
|
"rewards/rejected": -0.48888787627220154, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"logits/chosen": -1.870165228843689, |
|
"logits/rejected": -1.3282101154327393, |
|
"logps/chosen": -545.804931640625, |
|
"logps/rejected": -1157.7216796875, |
|
"loss": 0.0724, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.13343581557273865, |
|
"rewards/margins": 0.23632793128490448, |
|
"rewards/rejected": -0.3697637617588043, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.940424806108619e-06, |
|
"logits/chosen": -1.7240148782730103, |
|
"logits/rejected": -1.2849372625350952, |
|
"logps/chosen": -562.0394897460938, |
|
"logps/rejected": -1218.740478515625, |
|
"loss": 0.0821, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.11796098947525024, |
|
"rewards/margins": 0.26410388946533203, |
|
"rewards/rejected": -0.38206490874290466, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.935269157073597e-06, |
|
"logits/chosen": -1.7999210357666016, |
|
"logits/rejected": -1.4692919254302979, |
|
"logps/chosen": -535.56396484375, |
|
"logps/rejected": -1111.559814453125, |
|
"loss": 0.1083, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1319592297077179, |
|
"rewards/margins": 0.24687273800373077, |
|
"rewards/rejected": -0.3788319528102875, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9299025014463665e-06, |
|
"logits/chosen": -1.8443183898925781, |
|
"logits/rejected": -1.105686902999878, |
|
"logps/chosen": -678.8682250976562, |
|
"logps/rejected": -1240.9027099609375, |
|
"loss": 0.0645, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.14497962594032288, |
|
"rewards/margins": 0.2707517147064209, |
|
"rewards/rejected": -0.415731281042099, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.924325304226745e-06, |
|
"logits/chosen": -1.597224235534668, |
|
"logits/rejected": -1.1997989416122437, |
|
"logps/chosen": -519.6995239257812, |
|
"logps/rejected": -1027.214599609375, |
|
"loss": 0.1139, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.1587638556957245, |
|
"rewards/margins": 0.2005012482404709, |
|
"rewards/rejected": -0.3592650890350342, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.91853804865716e-06, |
|
"logits/chosen": -1.8366111516952515, |
|
"logits/rejected": -1.1595910787582397, |
|
"logps/chosen": -564.30615234375, |
|
"logps/rejected": -1044.134521484375, |
|
"loss": 0.1318, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.09679403156042099, |
|
"rewards/margins": 0.2052040547132492, |
|
"rewards/rejected": -0.3019980788230896, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.912541236180779e-06, |
|
"logits/chosen": -1.5248780250549316, |
|
"logits/rejected": -0.9694509506225586, |
|
"logps/chosen": -725.3447265625, |
|
"logps/rejected": -1283.6407470703125, |
|
"loss": 0.077, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.18108052015304565, |
|
"rewards/margins": 0.2481127232313156, |
|
"rewards/rejected": -0.42919325828552246, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9063353863980565e-06, |
|
"logits/chosen": -1.7408733367919922, |
|
"logits/rejected": -1.133147954940796, |
|
"logps/chosen": -621.7525024414062, |
|
"logps/rejected": -1341.664306640625, |
|
"loss": 0.0597, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.18977642059326172, |
|
"rewards/margins": 0.3114860951900482, |
|
"rewards/rejected": -0.5012625455856323, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.899921037021719e-06, |
|
"logits/chosen": -1.7900583744049072, |
|
"logits/rejected": -1.2211220264434814, |
|
"logps/chosen": -654.8604736328125, |
|
"logps/rejected": -1338.195068359375, |
|
"loss": 0.0749, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2140086591243744, |
|
"rewards/margins": 0.278323233127594, |
|
"rewards/rejected": -0.4923318922519684, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.893298743830168e-06, |
|
"logits/chosen": -1.8528478145599365, |
|
"logits/rejected": -1.1774396896362305, |
|
"logps/chosen": -580.0806884765625, |
|
"logps/rejected": -1167.764892578125, |
|
"loss": 0.0921, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.12002040445804596, |
|
"rewards/margins": 0.2513282895088196, |
|
"rewards/rejected": -0.37134867906570435, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.88646908061933e-06, |
|
"logits/chosen": -1.7654540538787842, |
|
"logits/rejected": -1.2601468563079834, |
|
"logps/chosen": -561.857177734375, |
|
"logps/rejected": -1196.891357421875, |
|
"loss": 0.0861, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.06450385600328445, |
|
"rewards/margins": 0.24975624680519104, |
|
"rewards/rejected": -0.3142600357532501, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.879432639152935e-06, |
|
"logits/chosen": -1.7489614486694336, |
|
"logits/rejected": -1.418021559715271, |
|
"logps/chosen": -545.885498046875, |
|
"logps/rejected": -1274.466552734375, |
|
"loss": 0.0804, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.11761901527643204, |
|
"rewards/margins": 0.29888078570365906, |
|
"rewards/rejected": -0.4164997935295105, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8721900291112415e-06, |
|
"logits/chosen": -1.6508852243423462, |
|
"logits/rejected": -1.3089258670806885, |
|
"logps/chosen": -873.3531494140625, |
|
"logps/rejected": -1446.792724609375, |
|
"loss": 0.0813, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3387971818447113, |
|
"rewards/margins": 0.23475828766822815, |
|
"rewards/rejected": -0.5735554695129395, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.864741878038218e-06, |
|
"logits/chosen": -1.5085474252700806, |
|
"logits/rejected": -0.964281439781189, |
|
"logps/chosen": -629.9556274414062, |
|
"logps/rejected": -1125.6422119140625, |
|
"loss": 0.1154, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.21656934916973114, |
|
"rewards/margins": 0.25458166003227234, |
|
"rewards/rejected": -0.47115105390548706, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.857088831287158e-06, |
|
"logits/chosen": -1.6169427633285522, |
|
"logits/rejected": -1.1799370050430298, |
|
"logps/chosen": -644.1641235351562, |
|
"logps/rejected": -1156.3399658203125, |
|
"loss": 0.1243, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.20531603693962097, |
|
"rewards/margins": 0.21790286898612976, |
|
"rewards/rejected": -0.4232189655303955, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.849231551964771e-06, |
|
"logits/chosen": -1.7949107885360718, |
|
"logits/rejected": -0.9273164868354797, |
|
"logps/chosen": -694.5661010742188, |
|
"logps/rejected": -1367.0455322265625, |
|
"loss": 0.0694, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.15553930401802063, |
|
"rewards/margins": 0.3378245532512665, |
|
"rewards/rejected": -0.4933638572692871, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.841170720873723e-06, |
|
"logits/chosen": -1.671508550643921, |
|
"logits/rejected": -1.0756802558898926, |
|
"logps/chosen": -755.0061645507812, |
|
"logps/rejected": -1223.784423828125, |
|
"loss": 0.0906, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.28831586241722107, |
|
"rewards/margins": 0.23300468921661377, |
|
"rewards/rejected": -0.521320641040802, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.832907036453647e-06, |
|
"logits/chosen": -1.6868644952774048, |
|
"logits/rejected": -1.017174482345581, |
|
"logps/chosen": -857.76904296875, |
|
"logps/rejected": -1345.37841796875, |
|
"loss": 0.1096, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.31581613421440125, |
|
"rewards/margins": 0.24452456831932068, |
|
"rewards/rejected": -0.5603407621383667, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.824441214720629e-06, |
|
"logits/chosen": -1.5865137577056885, |
|
"logits/rejected": -0.9322856068611145, |
|
"logps/chosen": -661.6602783203125, |
|
"logps/rejected": -1249.6767578125, |
|
"loss": 0.0862, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2079945057630539, |
|
"rewards/margins": 0.2843298017978668, |
|
"rewards/rejected": -0.4923242926597595, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.815773989205165e-06, |
|
"logits/chosen": -1.4541298151016235, |
|
"logits/rejected": -1.0230770111083984, |
|
"logps/chosen": -536.982421875, |
|
"logps/rejected": -1293.7628173828125, |
|
"loss": 0.0569, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1561233103275299, |
|
"rewards/margins": 0.27445393800735474, |
|
"rewards/rejected": -0.43057721853256226, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.806906110888606e-06, |
|
"logits/chosen": -1.5984938144683838, |
|
"logits/rejected": -1.1344892978668213, |
|
"logps/chosen": -560.5667114257812, |
|
"logps/rejected": -1207.4886474609375, |
|
"loss": 0.0874, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1664171814918518, |
|
"rewards/margins": 0.2653278708457947, |
|
"rewards/rejected": -0.4317450523376465, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7978383481380865e-06, |
|
"logits/chosen": -1.690313696861267, |
|
"logits/rejected": -1.2708024978637695, |
|
"logps/chosen": -635.1510009765625, |
|
"logps/rejected": -1258.609619140625, |
|
"loss": 0.0729, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.19007566571235657, |
|
"rewards/margins": 0.27011603116989136, |
|
"rewards/rejected": -0.4601917266845703, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.788571486639948e-06, |
|
"logits/chosen": -1.736071228981018, |
|
"logits/rejected": -1.4028418064117432, |
|
"logps/chosen": -652.5921630859375, |
|
"logps/rejected": -1381.756591796875, |
|
"loss": 0.0832, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.20194599032402039, |
|
"rewards/margins": 0.2887413501739502, |
|
"rewards/rejected": -0.49068737030029297, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.779106329331665e-06, |
|
"logits/chosen": -1.542184829711914, |
|
"logits/rejected": -0.9706169962882996, |
|
"logps/chosen": -619.546875, |
|
"logps/rejected": -1174.707763671875, |
|
"loss": 0.0978, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.16596826910972595, |
|
"rewards/margins": 0.2696087956428528, |
|
"rewards/rejected": -0.43557706475257874, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.769443696332272e-06, |
|
"logits/chosen": -1.812591552734375, |
|
"logits/rejected": -1.1759769916534424, |
|
"logps/chosen": -690.1990966796875, |
|
"logps/rejected": -1262.889892578125, |
|
"loss": 0.0952, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.21801619231700897, |
|
"rewards/margins": 0.2809485197067261, |
|
"rewards/rejected": -0.49896472692489624, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.759584424871302e-06, |
|
"logits/chosen": -1.5376830101013184, |
|
"logits/rejected": -0.7281585931777954, |
|
"logps/chosen": -729.5716552734375, |
|
"logps/rejected": -1302.743408203125, |
|
"loss": 0.0844, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.26298269629478455, |
|
"rewards/margins": 0.28789180517196655, |
|
"rewards/rejected": -0.5508745908737183, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.749529369216246e-06, |
|
"logits/chosen": -1.5037004947662354, |
|
"logits/rejected": -0.9839785695075989, |
|
"logps/chosen": -823.6188354492188, |
|
"logps/rejected": -1403.5616455078125, |
|
"loss": 0.0976, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2846857011318207, |
|
"rewards/margins": 0.2837271988391876, |
|
"rewards/rejected": -0.5684128403663635, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7392794005985324e-06, |
|
"logits/chosen": -1.379028081893921, |
|
"logits/rejected": -1.1316661834716797, |
|
"logps/chosen": -523.5051879882812, |
|
"logps/rejected": -1148.9351806640625, |
|
"loss": 0.0857, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.17531812191009521, |
|
"rewards/margins": 0.21767202019691467, |
|
"rewards/rejected": -0.3929901719093323, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7288354071380415e-06, |
|
"logits/chosen": -1.7908474206924438, |
|
"logits/rejected": -1.2674061059951782, |
|
"logps/chosen": -596.2501831054688, |
|
"logps/rejected": -1295.179931640625, |
|
"loss": 0.0668, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1408284455537796, |
|
"rewards/margins": 0.2789645195007324, |
|
"rewards/rejected": -0.4197929799556732, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7181982937661485e-06, |
|
"logits/chosen": -1.5081275701522827, |
|
"logits/rejected": -1.0480270385742188, |
|
"logps/chosen": -599.3560791015625, |
|
"logps/rejected": -1368.0018310546875, |
|
"loss": 0.0594, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17826338112354279, |
|
"rewards/margins": 0.3195931613445282, |
|
"rewards/rejected": -0.497856467962265, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.707368982147318e-06, |
|
"logits/chosen": -1.6069328784942627, |
|
"logits/rejected": -0.8594322204589844, |
|
"logps/chosen": -767.88427734375, |
|
"logps/rejected": -1367.8349609375, |
|
"loss": 0.0836, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.22526466846466064, |
|
"rewards/margins": 0.2738983929157257, |
|
"rewards/rejected": -0.49916306138038635, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.696348410599244e-06, |
|
"logits/chosen": -1.690263032913208, |
|
"logits/rejected": -1.122013807296753, |
|
"logps/chosen": -767.4993896484375, |
|
"logps/rejected": -1443.0506591796875, |
|
"loss": 0.08, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.24120143055915833, |
|
"rewards/margins": 0.29539158940315247, |
|
"rewards/rejected": -0.5365930199623108, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.685137534011549e-06, |
|
"logits/chosen": -1.711660385131836, |
|
"logits/rejected": -1.2863471508026123, |
|
"logps/chosen": -588.4500732421875, |
|
"logps/rejected": -1289.240966796875, |
|
"loss": 0.0833, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.14869460463523865, |
|
"rewards/margins": 0.28291845321655273, |
|
"rewards/rejected": -0.4316130578517914, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.673737323763048e-06, |
|
"logits/chosen": -1.8320610523223877, |
|
"logits/rejected": -1.292765736579895, |
|
"logps/chosen": -668.5866088867188, |
|
"logps/rejected": -1182.257080078125, |
|
"loss": 0.1009, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.18477411568164825, |
|
"rewards/margins": 0.22763219475746155, |
|
"rewards/rejected": -0.4124062955379486, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.662148767637578e-06, |
|
"logits/chosen": -1.5611059665679932, |
|
"logits/rejected": -1.1849793195724487, |
|
"logps/chosen": -463.08355712890625, |
|
"logps/rejected": -1084.614501953125, |
|
"loss": 0.1011, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.08577910810709, |
|
"rewards/margins": 0.24440805613994598, |
|
"rewards/rejected": -0.3301871418952942, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.650372869738415e-06, |
|
"logits/chosen": -1.511456847190857, |
|
"logits/rejected": -1.0551631450653076, |
|
"logps/chosen": -556.8165283203125, |
|
"logps/rejected": -1228.0035400390625, |
|
"loss": 0.0704, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1265622079372406, |
|
"rewards/margins": 0.3069656193256378, |
|
"rewards/rejected": -0.4335278570652008, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.638410650401267e-06, |
|
"logits/chosen": -1.5442140102386475, |
|
"logits/rejected": -0.9868549108505249, |
|
"logps/chosen": -613.2291259765625, |
|
"logps/rejected": -1180.44384765625, |
|
"loss": 0.0921, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.14740554988384247, |
|
"rewards/margins": 0.27208468317985535, |
|
"rewards/rejected": -0.419490247964859, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626263146105875e-06, |
|
"logits/chosen": -1.7330728769302368, |
|
"logits/rejected": -1.2073631286621094, |
|
"logps/chosen": -700.1824340820312, |
|
"logps/rejected": -1393.9658203125, |
|
"loss": 0.0826, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.21020905673503876, |
|
"rewards/margins": 0.29664525389671326, |
|
"rewards/rejected": -0.5068542957305908, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.613931409386196e-06, |
|
"logits/chosen": -1.6415268182754517, |
|
"logits/rejected": -1.1623541116714478, |
|
"logps/chosen": -577.8115844726562, |
|
"logps/rejected": -1281.671875, |
|
"loss": 0.062, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.13329057395458221, |
|
"rewards/margins": 0.31404128670692444, |
|
"rewards/rejected": -0.44733184576034546, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.601416508739211e-06, |
|
"logits/chosen": -1.5495585203170776, |
|
"logits/rejected": -1.219543695449829, |
|
"logps/chosen": -611.6688842773438, |
|
"logps/rejected": -1210.188720703125, |
|
"loss": 0.0883, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.16439898312091827, |
|
"rewards/margins": 0.22104132175445557, |
|
"rewards/rejected": -0.38544028997421265, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.588719528532342e-06, |
|
"logits/chosen": -1.6590620279312134, |
|
"logits/rejected": -1.1156672239303589, |
|
"logps/chosen": -558.9346923828125, |
|
"logps/rejected": -1250.8326416015625, |
|
"loss": 0.0613, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.11309070885181427, |
|
"rewards/margins": 0.27457505464553833, |
|
"rewards/rejected": -0.3876657485961914, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.575841568909494e-06, |
|
"logits/chosen": -1.72207772731781, |
|
"logits/rejected": -0.9709069132804871, |
|
"logps/chosen": -574.8037719726562, |
|
"logps/rejected": -1254.651123046875, |
|
"loss": 0.0493, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.08526522666215897, |
|
"rewards/margins": 0.31907790899276733, |
|
"rewards/rejected": -0.4043431282043457, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.562783745695738e-06, |
|
"logits/chosen": -1.6100561618804932, |
|
"logits/rejected": -1.0836453437805176, |
|
"logps/chosen": -605.432373046875, |
|
"logps/rejected": -1389.6484375, |
|
"loss": 0.0562, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.13646581768989563, |
|
"rewards/margins": 0.3396046459674835, |
|
"rewards/rejected": -0.47607049345970154, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.549547190300622e-06, |
|
"logits/chosen": -1.5626428127288818, |
|
"logits/rejected": -1.1030738353729248, |
|
"logps/chosen": -767.6092529296875, |
|
"logps/rejected": -1497.570068359375, |
|
"loss": 0.0747, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.20658251643180847, |
|
"rewards/margins": 0.3094042241573334, |
|
"rewards/rejected": -0.5159868001937866, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536133049620143e-06, |
|
"logits/chosen": -1.67703378200531, |
|
"logits/rejected": -1.1302894353866577, |
|
"logps/chosen": -506.2029724121094, |
|
"logps/rejected": -1050.456298828125, |
|
"loss": 0.1074, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10386884212493896, |
|
"rewards/margins": 0.24720358848571777, |
|
"rewards/rejected": -0.35107240080833435, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.522542485937369e-06, |
|
"logits/chosen": -1.510943055152893, |
|
"logits/rejected": -1.2439063787460327, |
|
"logps/chosen": -546.3123168945312, |
|
"logps/rejected": -1226.2244873046875, |
|
"loss": 0.0807, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.15128368139266968, |
|
"rewards/margins": 0.2694355547428131, |
|
"rewards/rejected": -0.4207192063331604, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.508776676821739e-06, |
|
"logits/chosen": -1.4208545684814453, |
|
"logits/rejected": -0.8101575970649719, |
|
"logps/chosen": -645.05126953125, |
|
"logps/rejected": -1315.7305908203125, |
|
"loss": 0.0778, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.21077755093574524, |
|
"rewards/margins": 0.2958206832408905, |
|
"rewards/rejected": -0.5065982937812805, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.494836815027022e-06, |
|
"logits/chosen": -1.7293838262557983, |
|
"logits/rejected": -1.1680035591125488, |
|
"logps/chosen": -792.54443359375, |
|
"logps/rejected": -1469.5382080078125, |
|
"loss": 0.0713, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2889425456523895, |
|
"rewards/margins": 0.28303924202919006, |
|
"rewards/rejected": -0.5719817876815796, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4807241083879774e-06, |
|
"logits/chosen": -1.5097590684890747, |
|
"logits/rejected": -0.8481209874153137, |
|
"logps/chosen": -801.5206298828125, |
|
"logps/rejected": -1431.248291015625, |
|
"loss": 0.0753, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.29523470997810364, |
|
"rewards/margins": 0.2885613441467285, |
|
"rewards/rejected": -0.5837960243225098, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.466439779715696e-06, |
|
"logits/chosen": -1.5163007974624634, |
|
"logits/rejected": -0.9659198522567749, |
|
"logps/chosen": -658.9117431640625, |
|
"logps/rejected": -1309.037841796875, |
|
"loss": 0.0852, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.21787042915821075, |
|
"rewards/margins": 0.2800406515598297, |
|
"rewards/rejected": -0.49791112542152405, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.451985066691649e-06, |
|
"logits/chosen": -1.6100610494613647, |
|
"logits/rejected": -1.1183403730392456, |
|
"logps/chosen": -612.2716064453125, |
|
"logps/rejected": -1260.2725830078125, |
|
"loss": 0.0752, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.21446070075035095, |
|
"rewards/margins": 0.3018341362476349, |
|
"rewards/rejected": -0.5162948369979858, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.437361221760449e-06, |
|
"logits/chosen": -1.4113812446594238, |
|
"logits/rejected": -0.932741641998291, |
|
"logps/chosen": -581.6453247070312, |
|
"logps/rejected": -1137.978515625, |
|
"loss": 0.1072, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.20348218083381653, |
|
"rewards/margins": 0.25568509101867676, |
|
"rewards/rejected": -0.4591673016548157, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.422569512021332e-06, |
|
"logits/chosen": -1.6003286838531494, |
|
"logits/rejected": -0.9033063054084778, |
|
"logps/chosen": -878.2449340820312, |
|
"logps/rejected": -1371.249267578125, |
|
"loss": 0.0921, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.29766175150871277, |
|
"rewards/margins": 0.27674609422683716, |
|
"rewards/rejected": -0.5744079351425171, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.407611219118363e-06, |
|
"logits/chosen": -1.6345365047454834, |
|
"logits/rejected": -1.1004862785339355, |
|
"logps/chosen": -627.5486450195312, |
|
"logps/rejected": -1306.81640625, |
|
"loss": 0.0544, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.20319166779518127, |
|
"rewards/margins": 0.3195189833641052, |
|
"rewards/rejected": -0.5227106809616089, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.3924876391293915e-06, |
|
"logits/chosen": -1.518014669418335, |
|
"logits/rejected": -0.9323236346244812, |
|
"logps/chosen": -658.4776611328125, |
|
"logps/rejected": -1171.27587890625, |
|
"loss": 0.1018, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.22144591808319092, |
|
"rewards/margins": 0.2691202759742737, |
|
"rewards/rejected": -0.4905661642551422, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.377200082453748e-06, |
|
"logits/chosen": -1.5906239748001099, |
|
"logits/rejected": -0.9715790748596191, |
|
"logps/chosen": -688.2633056640625, |
|
"logps/rejected": -1256.56494140625, |
|
"loss": 0.0857, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2001594752073288, |
|
"rewards/margins": 0.25535720586776733, |
|
"rewards/rejected": -0.45551663637161255, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.361749873698707e-06, |
|
"logits/chosen": -1.8183555603027344, |
|
"logits/rejected": -1.1526719331741333, |
|
"logps/chosen": -603.915771484375, |
|
"logps/rejected": -1199.3839111328125, |
|
"loss": 0.0831, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1569216549396515, |
|
"rewards/margins": 0.24373742938041687, |
|
"rewards/rejected": -0.40065908432006836, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.346138351564711e-06, |
|
"logits/chosen": -1.5570250749588013, |
|
"logits/rejected": -1.029444932937622, |
|
"logps/chosen": -720.4706420898438, |
|
"logps/rejected": -1287.0992431640625, |
|
"loss": 0.0828, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.22045788168907166, |
|
"rewards/margins": 0.23490801453590393, |
|
"rewards/rejected": -0.4553658962249756, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.330366868729376e-06, |
|
"logits/chosen": -1.524060845375061, |
|
"logits/rejected": -1.06089186668396, |
|
"logps/chosen": -683.9163208007812, |
|
"logps/rejected": -1294.5540771484375, |
|
"loss": 0.0878, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.21835967898368835, |
|
"rewards/margins": 0.2567465901374817, |
|
"rewards/rejected": -0.4751063287258148, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3144367917302964e-06, |
|
"logits/chosen": -1.5951565504074097, |
|
"logits/rejected": -1.2337514162063599, |
|
"logps/chosen": -591.2457885742188, |
|
"logps/rejected": -1147.8511962890625, |
|
"loss": 0.0876, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.21049340069293976, |
|
"rewards/margins": 0.20914044976234436, |
|
"rewards/rejected": -0.41963380575180054, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2983495008466285e-06, |
|
"logits/chosen": -1.3378808498382568, |
|
"logits/rejected": -0.8554127812385559, |
|
"logps/chosen": -704.482177734375, |
|
"logps/rejected": -1244.0657958984375, |
|
"loss": 0.0835, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2313729077577591, |
|
"rewards/margins": 0.24450743198394775, |
|
"rewards/rejected": -0.47588032484054565, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2821063899795015e-06, |
|
"logits/chosen": -1.5431578159332275, |
|
"logits/rejected": -1.1451054811477661, |
|
"logps/chosen": -551.98486328125, |
|
"logps/rejected": -1086.984130859375, |
|
"loss": 0.1082, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.17247965931892395, |
|
"rewards/margins": 0.2379414588212967, |
|
"rewards/rejected": -0.41042113304138184, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.265708866531238e-06, |
|
"logits/chosen": -1.3170068264007568, |
|
"logits/rejected": -1.3953243494033813, |
|
"logps/chosen": -555.5161743164062, |
|
"logps/rejected": -1217.95556640625, |
|
"loss": 0.0874, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1994590014219284, |
|
"rewards/margins": 0.24560394883155823, |
|
"rewards/rejected": -0.4450629651546478, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.249158351283414e-06, |
|
"logits/chosen": -1.5873005390167236, |
|
"logits/rejected": -0.9497630000114441, |
|
"logps/chosen": -762.7379150390625, |
|
"logps/rejected": -1492.514404296875, |
|
"loss": 0.0591, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.26952821016311646, |
|
"rewards/margins": 0.34387946128845215, |
|
"rewards/rejected": -0.6134077310562134, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.232456278273743e-06, |
|
"logits/chosen": -1.6160411834716797, |
|
"logits/rejected": -0.9002545475959778, |
|
"logps/chosen": -827.9026489257812, |
|
"logps/rejected": -1476.774169921875, |
|
"loss": 0.0731, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2839387059211731, |
|
"rewards/margins": 0.3188818097114563, |
|
"rewards/rejected": -0.6028205156326294, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.215604094671835e-06, |
|
"logits/chosen": -1.7747631072998047, |
|
"logits/rejected": -1.4146007299423218, |
|
"logps/chosen": -544.9703369140625, |
|
"logps/rejected": -1178.328369140625, |
|
"loss": 0.0946, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.151317298412323, |
|
"rewards/margins": 0.26069843769073486, |
|
"rewards/rejected": -0.41201576590538025, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.198603260653792e-06, |
|
"logits/chosen": -1.6157176494598389, |
|
"logits/rejected": -0.8367627859115601, |
|
"logps/chosen": -626.7408447265625, |
|
"logps/rejected": -1291.121826171875, |
|
"loss": 0.0867, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1371547281742096, |
|
"rewards/margins": 0.3223341405391693, |
|
"rewards/rejected": -0.45948880910873413, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.181455249275701e-06, |
|
"logits/chosen": -1.7481143474578857, |
|
"logits/rejected": -1.12859308719635, |
|
"logps/chosen": -634.04638671875, |
|
"logps/rejected": -1124.959716796875, |
|
"loss": 0.0954, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.16867594420909882, |
|
"rewards/margins": 0.2416352778673172, |
|
"rewards/rejected": -0.410311222076416, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1641615463459926e-06, |
|
"logits/chosen": -1.5910767316818237, |
|
"logits/rejected": -1.0931599140167236, |
|
"logps/chosen": -631.27392578125, |
|
"logps/rejected": -1231.7950439453125, |
|
"loss": 0.0834, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16265997290611267, |
|
"rewards/margins": 0.24569034576416016, |
|
"rewards/rejected": -0.4083503186702728, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.146723650296701e-06, |
|
"logits/chosen": -1.6471401453018188, |
|
"logits/rejected": -1.2424445152282715, |
|
"logps/chosen": -468.88739013671875, |
|
"logps/rejected": -1020.91259765625, |
|
"loss": 0.102, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10470066964626312, |
|
"rewards/margins": 0.23985891044139862, |
|
"rewards/rejected": -0.34455958008766174, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.129143072053639e-06, |
|
"logits/chosen": -1.608716607093811, |
|
"logits/rejected": -0.9606212377548218, |
|
"logps/chosen": -669.7889404296875, |
|
"logps/rejected": -1358.1666259765625, |
|
"loss": 0.0559, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.17117540538311005, |
|
"rewards/margins": 0.31088918447494507, |
|
"rewards/rejected": -0.4820645749568939, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.111421334905468e-06, |
|
"logits/chosen": -1.5793676376342773, |
|
"logits/rejected": -1.0094425678253174, |
|
"logps/chosen": -710.6974487304688, |
|
"logps/rejected": -1337.674560546875, |
|
"loss": 0.0837, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2793545424938202, |
|
"rewards/margins": 0.28449904918670654, |
|
"rewards/rejected": -0.5638536214828491, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.093559974371725e-06, |
|
"logits/chosen": -1.3306772708892822, |
|
"logits/rejected": -1.1824491024017334, |
|
"logps/chosen": -800.3571166992188, |
|
"logps/rejected": -1588.775146484375, |
|
"loss": 0.088, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.32887041568756104, |
|
"rewards/margins": 0.32484978437423706, |
|
"rewards/rejected": -0.6537202596664429, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.075560538069767e-06, |
|
"logits/chosen": -1.5261657238006592, |
|
"logits/rejected": -1.0766921043395996, |
|
"logps/chosen": -690.2559814453125, |
|
"logps/rejected": -1296.5557861328125, |
|
"loss": 0.0804, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.21848633885383606, |
|
"rewards/margins": 0.2872334122657776, |
|
"rewards/rejected": -0.505719780921936, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.05742458558068e-06, |
|
"logits/chosen": -1.416123628616333, |
|
"logits/rejected": -1.1054461002349854, |
|
"logps/chosen": -764.1392822265625, |
|
"logps/rejected": -1332.025634765625, |
|
"loss": 0.1048, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.22151347994804382, |
|
"rewards/margins": 0.24781036376953125, |
|
"rewards/rejected": -0.4693238139152527, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.039153688314146e-06, |
|
"logits/chosen": -1.4527040719985962, |
|
"logits/rejected": -0.6857194900512695, |
|
"logps/chosen": -812.2103271484375, |
|
"logps/rejected": -1516.066650390625, |
|
"loss": 0.0609, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3405809998512268, |
|
"rewards/margins": 0.31844016909599304, |
|
"rewards/rejected": -0.6590211987495422, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.020749429372286e-06, |
|
"logits/chosen": -1.446703314781189, |
|
"logits/rejected": -0.9012134671211243, |
|
"logps/chosen": -742.4305419921875, |
|
"logps/rejected": -1499.476806640625, |
|
"loss": 0.069, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3392742872238159, |
|
"rewards/margins": 0.3239519000053406, |
|
"rewards/rejected": -0.6632262468338013, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.002213403412492e-06, |
|
"logits/chosen": -1.3960011005401611, |
|
"logits/rejected": -0.9817187190055847, |
|
"logps/chosen": -778.759033203125, |
|
"logps/rejected": -1387.777099609375, |
|
"loss": 0.0727, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3149294853210449, |
|
"rewards/margins": 0.27625852823257446, |
|
"rewards/rejected": -0.5911880731582642, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.983547216509254e-06, |
|
"logits/chosen": -1.5987809896469116, |
|
"logits/rejected": -0.7307044863700867, |
|
"logps/chosen": -858.3521728515625, |
|
"logps/rejected": -1550.783935546875, |
|
"loss": 0.0618, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.2980232834815979, |
|
"rewards/margins": 0.3726673722267151, |
|
"rewards/rejected": -0.670690655708313, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.964752486015001e-06, |
|
"logits/chosen": -1.7145130634307861, |
|
"logits/rejected": -1.0826901197433472, |
|
"logps/chosen": -551.7413330078125, |
|
"logps/rejected": -973.1448364257812, |
|
"loss": 0.1367, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.17761428654193878, |
|
"rewards/margins": 0.18822605907917023, |
|
"rewards/rejected": -0.365840345621109, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.945830840419966e-06, |
|
"logits/chosen": -1.7292487621307373, |
|
"logits/rejected": -1.03867506980896, |
|
"logps/chosen": -610.347900390625, |
|
"logps/rejected": -1196.255126953125, |
|
"loss": 0.0801, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.19123509526252747, |
|
"rewards/margins": 0.27820879220962524, |
|
"rewards/rejected": -0.4694438874721527, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.92678391921108e-06, |
|
"logits/chosen": -1.7660369873046875, |
|
"logits/rejected": -0.8002158403396606, |
|
"logps/chosen": -790.7001342773438, |
|
"logps/rejected": -1427.8797607421875, |
|
"loss": 0.0402, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.24115972220897675, |
|
"rewards/margins": 0.34107840061187744, |
|
"rewards/rejected": -0.5822380781173706, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.907613372729916e-06, |
|
"logits/chosen": -1.5305942296981812, |
|
"logits/rejected": -0.9681123495101929, |
|
"logps/chosen": -683.839599609375, |
|
"logps/rejected": -1229.334228515625, |
|
"loss": 0.1139, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.26777878403663635, |
|
"rewards/margins": 0.2556554675102234, |
|
"rewards/rejected": -0.5234342813491821, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.888320862029699e-06, |
|
"logits/chosen": -1.5409247875213623, |
|
"logits/rejected": -1.1824085712432861, |
|
"logps/chosen": -565.3055419921875, |
|
"logps/rejected": -1225.6197509765625, |
|
"loss": 0.0719, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.21753540635108948, |
|
"rewards/margins": 0.262325644493103, |
|
"rewards/rejected": -0.4798610210418701, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.868908058731376e-06, |
|
"logits/chosen": -1.6002075672149658, |
|
"logits/rejected": -0.9055458903312683, |
|
"logps/chosen": -626.1796264648438, |
|
"logps/rejected": -1374.696044921875, |
|
"loss": 0.045, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.20963218808174133, |
|
"rewards/margins": 0.36331892013549805, |
|
"rewards/rejected": -0.5729510188102722, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.849376644878783e-06, |
|
"logits/chosen": -1.5136663913726807, |
|
"logits/rejected": -1.2368497848510742, |
|
"logps/chosen": -744.4407958984375, |
|
"logps/rejected": -1368.80126953125, |
|
"loss": 0.0722, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.24185235798358917, |
|
"rewards/margins": 0.2773440182209015, |
|
"rewards/rejected": -0.5191963911056519, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.829728312792895e-06, |
|
"logits/chosen": -1.6058063507080078, |
|
"logits/rejected": -1.3173209428787231, |
|
"logps/chosen": -614.1032104492188, |
|
"logps/rejected": -1252.304443359375, |
|
"loss": 0.0718, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1905815303325653, |
|
"rewards/margins": 0.2669087052345276, |
|
"rewards/rejected": -0.45749014616012573, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8099647649251984e-06, |
|
"logits/chosen": -1.5442336797714233, |
|
"logits/rejected": -1.108355164527893, |
|
"logps/chosen": -711.9778442382812, |
|
"logps/rejected": -1236.2525634765625, |
|
"loss": 0.1105, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.17289568483829498, |
|
"rewards/margins": 0.2372523844242096, |
|
"rewards/rejected": -0.41014808416366577, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.790087713710179e-06, |
|
"logits/chosen": -1.7366338968276978, |
|
"logits/rejected": -1.3570505380630493, |
|
"logps/chosen": -617.22607421875, |
|
"logps/rejected": -1209.1883544921875, |
|
"loss": 0.072, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.14606155455112457, |
|
"rewards/margins": 0.2611353397369385, |
|
"rewards/rejected": -0.40719684958457947, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.770098881416945e-06, |
|
"logits/chosen": -1.6808090209960938, |
|
"logits/rejected": -0.7971758842468262, |
|
"logps/chosen": -862.0647583007812, |
|
"logps/rejected": -1464.6036376953125, |
|
"loss": 0.0635, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.23483994603157043, |
|
"rewards/margins": 0.32541361451148987, |
|
"rewards/rejected": -0.5602535009384155, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -1.6327816247940063, |
|
"logits/rejected": -0.9372482299804688, |
|
"logps/chosen": -734.8026123046875, |
|
"logps/rejected": -1427.211181640625, |
|
"loss": 0.0614, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2201705276966095, |
|
"rewards/margins": 0.31630367040634155, |
|
"rewards/rejected": -0.5364742279052734, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7297928109491765e-06, |
|
"logits/chosen": -1.4493783712387085, |
|
"logits/rejected": -1.1883541345596313, |
|
"logps/chosen": -693.4669189453125, |
|
"logps/rejected": -1344.7279052734375, |
|
"loss": 0.0707, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2357136309146881, |
|
"rewards/margins": 0.28132572770118713, |
|
"rewards/rejected": -0.5170393586158752, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.7094790651387414e-06, |
|
"logits/chosen": -1.6289390325546265, |
|
"logits/rejected": -1.1786553859710693, |
|
"logps/chosen": -669.4554443359375, |
|
"logps/rejected": -1307.296875, |
|
"loss": 0.0715, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2002047747373581, |
|
"rewards/margins": 0.3151922821998596, |
|
"rewards/rejected": -0.5153970122337341, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.689060522675689e-06, |
|
"logits/chosen": -1.5846166610717773, |
|
"logits/rejected": -0.9855213165283203, |
|
"logps/chosen": -595.7760620117188, |
|
"logps/rejected": -1270.208740234375, |
|
"loss": 0.0617, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1446508765220642, |
|
"rewards/margins": 0.3013971149921417, |
|
"rewards/rejected": -0.4460480213165283, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.668538952747236e-06, |
|
"logits/chosen": -1.6458574533462524, |
|
"logits/rejected": -1.1102992296218872, |
|
"logps/chosen": -588.1439208984375, |
|
"logps/rejected": -1164.717529296875, |
|
"loss": 0.0999, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1480761170387268, |
|
"rewards/margins": 0.22489504516124725, |
|
"rewards/rejected": -0.37297114729881287, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6479161334675294e-06, |
|
"logits/chosen": -1.7304718494415283, |
|
"logits/rejected": -1.0173327922821045, |
|
"logps/chosen": -598.8756713867188, |
|
"logps/rejected": -1213.0777587890625, |
|
"loss": 0.064, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.13348576426506042, |
|
"rewards/margins": 0.30749550461769104, |
|
"rewards/rejected": -0.44098129868507385, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.627193851723577e-06, |
|
"logits/chosen": -1.7607488632202148, |
|
"logits/rejected": -1.0212706327438354, |
|
"logps/chosen": -677.742431640625, |
|
"logps/rejected": -1323.628173828125, |
|
"loss": 0.0535, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1735551357269287, |
|
"rewards/margins": 0.3078446090221405, |
|
"rewards/rejected": -0.4813997745513916, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6063739030204226e-06, |
|
"logits/chosen": -1.6025089025497437, |
|
"logits/rejected": -0.9767768979072571, |
|
"logps/chosen": -597.2946166992188, |
|
"logps/rejected": -1132.9073486328125, |
|
"loss": 0.1038, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.16409102082252502, |
|
"rewards/margins": 0.2722373902797699, |
|
"rewards/rejected": -0.4363284111022949, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.5854580913255706e-06, |
|
"logits/chosen": -1.616591215133667, |
|
"logits/rejected": -1.1780548095703125, |
|
"logps/chosen": -675.4401245117188, |
|
"logps/rejected": -1367.3082275390625, |
|
"loss": 0.0635, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.22119805216789246, |
|
"rewards/margins": 0.2958839535713196, |
|
"rewards/rejected": -0.5170820951461792, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.564448228912682e-06, |
|
"logits/chosen": -1.5432292222976685, |
|
"logits/rejected": -0.871782660484314, |
|
"logps/chosen": -773.683837890625, |
|
"logps/rejected": -1428.926025390625, |
|
"loss": 0.0403, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.21216030418872833, |
|
"rewards/margins": 0.35337987542152405, |
|
"rewards/rejected": -0.5655401349067688, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.543346136204545e-06, |
|
"logits/chosen": -1.3973724842071533, |
|
"logits/rejected": -1.1439921855926514, |
|
"logps/chosen": -646.6536254882812, |
|
"logps/rejected": -1378.410400390625, |
|
"loss": 0.0582, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.18542486429214478, |
|
"rewards/margins": 0.3139794170856476, |
|
"rewards/rejected": -0.49940428137779236, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.522153641615345e-06, |
|
"logits/chosen": -1.7498506307601929, |
|
"logits/rejected": -0.853812038898468, |
|
"logps/chosen": -691.226806640625, |
|
"logps/rejected": -1190.421630859375, |
|
"loss": 0.0798, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1312675178050995, |
|
"rewards/margins": 0.2993599772453308, |
|
"rewards/rejected": -0.4306275248527527, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5008725813922383e-06, |
|
"logits/chosen": -1.6955890655517578, |
|
"logits/rejected": -1.0088529586791992, |
|
"logps/chosen": -625.9053955078125, |
|
"logps/rejected": -1268.4305419921875, |
|
"loss": 0.0572, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.15290668606758118, |
|
"rewards/margins": 0.2966083288192749, |
|
"rewards/rejected": -0.44951504468917847, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.4795047994562463e-06, |
|
"logits/chosen": -1.787287950515747, |
|
"logits/rejected": -0.991382896900177, |
|
"logps/chosen": -736.6607666015625, |
|
"logps/rejected": -1425.2071533203125, |
|
"loss": 0.041, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.2185276746749878, |
|
"rewards/margins": 0.3424827456474304, |
|
"rewards/rejected": -0.561010479927063, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.458052147242494e-06, |
|
"logits/chosen": -1.5612847805023193, |
|
"logits/rejected": -0.9777131080627441, |
|
"logps/chosen": -677.7218627929688, |
|
"logps/rejected": -1298.963623046875, |
|
"loss": 0.0625, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.19684460759162903, |
|
"rewards/margins": 0.29719001054763794, |
|
"rewards/rejected": -0.49403461813926697, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.436516483539781e-06, |
|
"logits/chosen": -1.522048830986023, |
|
"logits/rejected": -0.9393981099128723, |
|
"logps/chosen": -657.0848388671875, |
|
"logps/rejected": -1337.720458984375, |
|
"loss": 0.0442, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.20744872093200684, |
|
"rewards/margins": 0.3454239070415497, |
|
"rewards/rejected": -0.5528727173805237, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4148996743295305e-06, |
|
"logits/chosen": -1.6736905574798584, |
|
"logits/rejected": -1.3306677341461182, |
|
"logps/chosen": -594.0120849609375, |
|
"logps/rejected": -1347.886474609375, |
|
"loss": 0.0643, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16157297790050507, |
|
"rewards/margins": 0.33852246403694153, |
|
"rewards/rejected": -0.5000954270362854, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3932035926241103e-06, |
|
"logits/chosen": -1.5001100301742554, |
|
"logits/rejected": -1.099002718925476, |
|
"logps/chosen": -574.8411254882812, |
|
"logps/rejected": -1262.277099609375, |
|
"loss": 0.071, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17917487025260925, |
|
"rewards/margins": 0.3023154139518738, |
|
"rewards/rejected": -0.4814903140068054, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3714301183045382e-06, |
|
"logits/chosen": -1.5629947185516357, |
|
"logits/rejected": -1.0339363813400269, |
|
"logps/chosen": -695.541748046875, |
|
"logps/rejected": -1296.75830078125, |
|
"loss": 0.0871, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.19980263710021973, |
|
"rewards/margins": 0.25737708806991577, |
|
"rewards/rejected": -0.4571797847747803, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.349581137957604e-06, |
|
"logits/chosen": -1.7300357818603516, |
|
"logits/rejected": -0.9519279599189758, |
|
"logps/chosen": -630.9297485351562, |
|
"logps/rejected": -1248.482421875, |
|
"loss": 0.0576, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1405705362558365, |
|
"rewards/margins": 0.3395325243473053, |
|
"rewards/rejected": -0.4801030158996582, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3276585447123957e-06, |
|
"logits/chosen": -1.7029545307159424, |
|
"logits/rejected": -0.8141438364982605, |
|
"logps/chosen": -717.7041625976562, |
|
"logps/rejected": -1194.781005859375, |
|
"loss": 0.0961, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1431819498538971, |
|
"rewards/margins": 0.2582007944583893, |
|
"rewards/rejected": -0.40138277411460876, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3056642380762783e-06, |
|
"logits/chosen": -1.7564830780029297, |
|
"logits/rejected": -1.1706587076187134, |
|
"logps/chosen": -668.6366577148438, |
|
"logps/rejected": -1349.9146728515625, |
|
"loss": 0.0828, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.15937338769435883, |
|
"rewards/margins": 0.3159894347190857, |
|
"rewards/rejected": -0.4753628373146057, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2836001237702993e-06, |
|
"logits/chosen": -1.8231847286224365, |
|
"logits/rejected": -1.20169198513031, |
|
"logps/chosen": -600.72265625, |
|
"logps/rejected": -1295.0966796875, |
|
"loss": 0.0506, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.13170823454856873, |
|
"rewards/margins": 0.3157060444355011, |
|
"rewards/rejected": -0.4474143087863922, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2614681135640696e-06, |
|
"logits/chosen": -1.424872636795044, |
|
"logits/rejected": -1.0434539318084717, |
|
"logps/chosen": -765.4076538085938, |
|
"logps/rejected": -1424.029052734375, |
|
"loss": 0.088, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.22880463302135468, |
|
"rewards/margins": 0.25481894612312317, |
|
"rewards/rejected": -0.48362359404563904, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2392701251101172e-06, |
|
"logits/chosen": -1.6727033853530884, |
|
"logits/rejected": -0.9850581884384155, |
|
"logps/chosen": -603.7737426757812, |
|
"logps/rejected": -1283.9576416015625, |
|
"loss": 0.0851, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1411423683166504, |
|
"rewards/margins": 0.2962447702884674, |
|
"rewards/rejected": -0.4373871386051178, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.217008081777726e-06, |
|
"logits/chosen": -1.4920157194137573, |
|
"logits/rejected": -1.0782114267349243, |
|
"logps/chosen": -520.2071533203125, |
|
"logps/rejected": -1167.503662109375, |
|
"loss": 0.0687, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12038830667734146, |
|
"rewards/margins": 0.26983264088630676, |
|
"rewards/rejected": -0.3902209401130676, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1946839124862873e-06, |
|
"logits/chosen": -1.59346342086792, |
|
"logits/rejected": -0.8561047315597534, |
|
"logps/chosen": -620.8773803710938, |
|
"logps/rejected": -1345.0489501953125, |
|
"loss": 0.0611, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1517111212015152, |
|
"rewards/margins": 0.29772210121154785, |
|
"rewards/rejected": -0.44943323731422424, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1722995515381644e-06, |
|
"logits/chosen": -1.590381383895874, |
|
"logits/rejected": -1.1091078519821167, |
|
"logps/chosen": -734.8489379882812, |
|
"logps/rejected": -1222.6331787109375, |
|
"loss": 0.0966, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.19709806144237518, |
|
"rewards/margins": 0.24125704169273376, |
|
"rewards/rejected": -0.43835514783859253, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.149856938451094e-06, |
|
"logits/chosen": -1.6151930093765259, |
|
"logits/rejected": -0.9260737299919128, |
|
"logps/chosen": -644.0718383789062, |
|
"logps/rejected": -1129.24267578125, |
|
"loss": 0.0756, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.14687219262123108, |
|
"rewards/margins": 0.2772686779499054, |
|
"rewards/rejected": -0.4241408407688141, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.127358017790132e-06, |
|
"logits/chosen": -1.8206260204315186, |
|
"logits/rejected": -1.036929965019226, |
|
"logps/chosen": -636.3726806640625, |
|
"logps/rejected": -1280.237060546875, |
|
"loss": 0.0717, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1703665554523468, |
|
"rewards/margins": 0.33507412672042847, |
|
"rewards/rejected": -0.5054406523704529, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1048047389991693e-06, |
|
"logits/chosen": -1.7902650833129883, |
|
"logits/rejected": -1.1972464323043823, |
|
"logps/chosen": -671.567138671875, |
|
"logps/rejected": -1139.710205078125, |
|
"loss": 0.096, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.17933420836925507, |
|
"rewards/margins": 0.23233267664909363, |
|
"rewards/rejected": -0.4116668701171875, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.082199056232015e-06, |
|
"logits/chosen": -1.74044668674469, |
|
"logits/rejected": -1.0097100734710693, |
|
"logps/chosen": -597.4201049804688, |
|
"logps/rejected": -1311.656494140625, |
|
"loss": 0.0724, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1604226678609848, |
|
"rewards/margins": 0.33031123876571655, |
|
"rewards/rejected": -0.4907340109348297, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.059542928183079e-06, |
|
"logits/chosen": -1.7327849864959717, |
|
"logits/rejected": -1.105128526687622, |
|
"logps/chosen": -666.2789306640625, |
|
"logps/rejected": -1354.0238037109375, |
|
"loss": 0.0725, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1905040591955185, |
|
"rewards/margins": 0.3310181498527527, |
|
"rewards/rejected": -0.5215222239494324, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0368383179176584e-06, |
|
"logits/chosen": -1.4599992036819458, |
|
"logits/rejected": -0.9781301617622375, |
|
"logps/chosen": -612.1408081054688, |
|
"logps/rejected": -1311.3360595703125, |
|
"loss": 0.0769, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.21572145819664001, |
|
"rewards/margins": 0.2840631902217865, |
|
"rewards/rejected": -0.4997846484184265, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0140871927018466e-06, |
|
"logits/chosen": -1.6299057006835938, |
|
"logits/rejected": -1.1118695735931396, |
|
"logps/chosen": -627.4758911132812, |
|
"logps/rejected": -1280.96630859375, |
|
"loss": 0.0673, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1827511489391327, |
|
"rewards/margins": 0.30515769124031067, |
|
"rewards/rejected": -0.4879087805747986, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.9912915238320755e-06, |
|
"logits/chosen": -1.7462234497070312, |
|
"logits/rejected": -1.0023224353790283, |
|
"logps/chosen": -662.7495727539062, |
|
"logps/rejected": -1253.028076171875, |
|
"loss": 0.0537, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1878022700548172, |
|
"rewards/margins": 0.30388498306274414, |
|
"rewards/rejected": -0.4916872978210449, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9684532864643123e-06, |
|
"logits/chosen": -1.6837208271026611, |
|
"logits/rejected": -1.037295937538147, |
|
"logps/chosen": -592.4058837890625, |
|
"logps/rejected": -1251.274658203125, |
|
"loss": 0.065, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.164472758769989, |
|
"rewards/margins": 0.3051846921443939, |
|
"rewards/rejected": -0.4696574807167053, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.945574459442917e-06, |
|
"logits/chosen": -1.7075077295303345, |
|
"logits/rejected": -1.0973665714263916, |
|
"logps/chosen": -623.5327758789062, |
|
"logps/rejected": -1248.406494140625, |
|
"loss": 0.0519, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.14970310032367706, |
|
"rewards/margins": 0.3357471525669098, |
|
"rewards/rejected": -0.48545026779174805, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.922657025129185e-06, |
|
"logits/chosen": -1.659570336341858, |
|
"logits/rejected": -1.0617914199829102, |
|
"logps/chosen": -783.8493041992188, |
|
"logps/rejected": -1338.663818359375, |
|
"loss": 0.1171, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.24820463359355927, |
|
"rewards/margins": 0.2960302531719208, |
|
"rewards/rejected": -0.5442348718643188, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.8997029692295875e-06, |
|
"logits/chosen": -1.685340166091919, |
|
"logits/rejected": -1.1272087097167969, |
|
"logps/chosen": -648.3568115234375, |
|
"logps/rejected": -1290.0946044921875, |
|
"loss": 0.0795, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2036975622177124, |
|
"rewards/margins": 0.29076558351516724, |
|
"rewards/rejected": -0.49446314573287964, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.876714280623708e-06, |
|
"logits/chosen": -1.5509741306304932, |
|
"logits/rejected": -1.353191614151001, |
|
"logps/chosen": -530.915771484375, |
|
"logps/rejected": -1135.705322265625, |
|
"loss": 0.1021, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.18661265075206757, |
|
"rewards/margins": 0.2390277087688446, |
|
"rewards/rejected": -0.425640344619751, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8536929511919227e-06, |
|
"logits/chosen": -1.5140665769577026, |
|
"logits/rejected": -0.9606078267097473, |
|
"logps/chosen": -781.3890380859375, |
|
"logps/rejected": -1375.6937255859375, |
|
"loss": 0.0884, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2563932538032532, |
|
"rewards/margins": 0.2651718854904175, |
|
"rewards/rejected": -0.5215650796890259, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8306409756428067e-06, |
|
"logits/chosen": -1.6241651773452759, |
|
"logits/rejected": -0.8508566617965698, |
|
"logps/chosen": -628.0801391601562, |
|
"logps/rejected": -1286.9000244140625, |
|
"loss": 0.0711, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.14530445635318756, |
|
"rewards/margins": 0.3342815041542053, |
|
"rewards/rejected": -0.4795859754085541, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.807560351340302e-06, |
|
"logits/chosen": -1.8166322708129883, |
|
"logits/rejected": -1.1172343492507935, |
|
"logps/chosen": -561.2113647460938, |
|
"logps/rejected": -1348.037841796875, |
|
"loss": 0.0472, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.12384496629238129, |
|
"rewards/margins": 0.3702481985092163, |
|
"rewards/rejected": -0.4940931797027588, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7844530781306544e-06, |
|
"logits/chosen": -1.6891119480133057, |
|
"logits/rejected": -1.0480057001113892, |
|
"logps/chosen": -686.9342041015625, |
|
"logps/rejected": -1289.268310546875, |
|
"loss": 0.0736, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.17461568117141724, |
|
"rewards/margins": 0.27256545424461365, |
|
"rewards/rejected": -0.44718116521835327, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.761321158169134e-06, |
|
"logits/chosen": -1.7340322732925415, |
|
"logits/rejected": -1.1662895679473877, |
|
"logps/chosen": -579.2034912109375, |
|
"logps/rejected": -1151.33154296875, |
|
"loss": 0.1019, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1479378640651703, |
|
"rewards/margins": 0.24325743317604065, |
|
"rewards/rejected": -0.3911953270435333, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.738166595746554e-06, |
|
"logits/chosen": -1.6452268362045288, |
|
"logits/rejected": -1.2167978286743164, |
|
"logps/chosen": -568.6005859375, |
|
"logps/rejected": -1262.2685546875, |
|
"loss": 0.0716, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.15343192219734192, |
|
"rewards/margins": 0.26729413866996765, |
|
"rewards/rejected": -0.42072606086730957, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.7149913971156105e-06, |
|
"logits/chosen": -1.818169355392456, |
|
"logits/rejected": -1.1683781147003174, |
|
"logps/chosen": -652.8570556640625, |
|
"logps/rejected": -1269.952392578125, |
|
"loss": 0.0727, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1778111755847931, |
|
"rewards/margins": 0.29952138662338257, |
|
"rewards/rejected": -0.47733253240585327, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6917975703170466e-06, |
|
"logits/chosen": -1.3900476694107056, |
|
"logits/rejected": -0.9688823819160461, |
|
"logps/chosen": -659.0379028320312, |
|
"logps/rejected": -1403.9931640625, |
|
"loss": 0.0587, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.21347947418689728, |
|
"rewards/margins": 0.3426669239997864, |
|
"rewards/rejected": -0.5561463832855225, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.668587125005663e-06, |
|
"logits/chosen": -1.7847397327423096, |
|
"logits/rejected": -1.250427007675171, |
|
"logps/chosen": -552.1531982421875, |
|
"logps/rejected": -1304.26123046875, |
|
"loss": 0.0691, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15698403120040894, |
|
"rewards/margins": 0.30882400274276733, |
|
"rewards/rejected": -0.46580806374549866, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6453620722761897e-06, |
|
"logits/chosen": -1.4962866306304932, |
|
"logits/rejected": -1.1884623765945435, |
|
"logps/chosen": -508.5406188964844, |
|
"logps/rejected": -1144.9775390625, |
|
"loss": 0.0703, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.14970794320106506, |
|
"rewards/margins": 0.26972299814224243, |
|
"rewards/rejected": -0.41943103075027466, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6221244244890336e-06, |
|
"logits/chosen": -1.4763376712799072, |
|
"logits/rejected": -1.208345890045166, |
|
"logps/chosen": -659.405029296875, |
|
"logps/rejected": -1311.456298828125, |
|
"loss": 0.0641, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.21015486121177673, |
|
"rewards/margins": 0.261810302734375, |
|
"rewards/rejected": -0.47196516394615173, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5988761950959133e-06, |
|
"logits/chosen": -1.6231094598770142, |
|
"logits/rejected": -1.3810958862304688, |
|
"logps/chosen": -578.0490112304688, |
|
"logps/rejected": -1218.637451171875, |
|
"loss": 0.0877, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.15429143607616425, |
|
"rewards/margins": 0.29225456714630127, |
|
"rewards/rejected": -0.4465459883213043, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.575619398465402e-06, |
|
"logits/chosen": -1.6102511882781982, |
|
"logits/rejected": -0.9045922160148621, |
|
"logps/chosen": -626.107666015625, |
|
"logps/rejected": -1319.302978515625, |
|
"loss": 0.0697, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.14732882380485535, |
|
"rewards/margins": 0.3220018446445465, |
|
"rewards/rejected": -0.46933069825172424, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5523560497083927e-06, |
|
"logits/chosen": -1.7386415004730225, |
|
"logits/rejected": -1.0501739978790283, |
|
"logps/chosen": -698.34765625, |
|
"logps/rejected": -1343.9432373046875, |
|
"loss": 0.0424, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.20332041382789612, |
|
"rewards/margins": 0.3136293292045593, |
|
"rewards/rejected": -0.5169497728347778, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5290881645034932e-06, |
|
"logits/chosen": -1.7993240356445312, |
|
"logits/rejected": -0.9968295097351074, |
|
"logps/chosen": -737.5123901367188, |
|
"logps/rejected": -1283.9423828125, |
|
"loss": 0.0839, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2413882464170456, |
|
"rewards/margins": 0.25109177827835083, |
|
"rewards/rejected": -0.4924800395965576, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5058177589223766e-06, |
|
"logits/chosen": -1.6581776142120361, |
|
"logits/rejected": -1.2274023294448853, |
|
"logps/chosen": -621.2349853515625, |
|
"logps/rejected": -1304.399169921875, |
|
"loss": 0.0534, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15700002014636993, |
|
"rewards/margins": 0.3114224672317505, |
|
"rewards/rejected": -0.4684225022792816, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.482546849255096e-06, |
|
"logits/chosen": -1.822373628616333, |
|
"logits/rejected": -0.9462094306945801, |
|
"logps/chosen": -783.3508911132812, |
|
"logps/rejected": -1461.9649658203125, |
|
"loss": 0.0723, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2161974459886551, |
|
"rewards/margins": 0.3368082642555237, |
|
"rewards/rejected": -0.5530056953430176, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4592774518353858e-06, |
|
"logits/chosen": -1.8345363140106201, |
|
"logits/rejected": -1.388167381286621, |
|
"logps/chosen": -656.7705688476562, |
|
"logps/rejected": -1437.629638671875, |
|
"loss": 0.0601, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1848996877670288, |
|
"rewards/margins": 0.3406218886375427, |
|
"rewards/rejected": -0.5255215764045715, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.436011582865945e-06, |
|
"logits/chosen": -1.5363919734954834, |
|
"logits/rejected": -1.1726293563842773, |
|
"logps/chosen": -633.4727172851562, |
|
"logps/rejected": -1333.555908203125, |
|
"loss": 0.0652, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17989441752433777, |
|
"rewards/margins": 0.3212878704071045, |
|
"rewards/rejected": -0.5011822581291199, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4127512582437486e-06, |
|
"logits/chosen": -1.5197073221206665, |
|
"logits/rejected": -0.9411822557449341, |
|
"logps/chosen": -578.3282470703125, |
|
"logps/rejected": -1179.3984375, |
|
"loss": 0.0888, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.18851898610591888, |
|
"rewards/margins": 0.27369505167007446, |
|
"rewards/rejected": -0.46221405267715454, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3894984933853734e-06, |
|
"logits/chosen": -1.599889874458313, |
|
"logits/rejected": -0.9709933996200562, |
|
"logps/chosen": -711.7459716796875, |
|
"logps/rejected": -1445.8603515625, |
|
"loss": 0.0469, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.21319825947284698, |
|
"rewards/margins": 0.34705930948257446, |
|
"rewards/rejected": -0.5602575540542603, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.366255303052377e-06, |
|
"logits/chosen": -1.5162818431854248, |
|
"logits/rejected": -0.9374685287475586, |
|
"logps/chosen": -663.5987548828125, |
|
"logps/rejected": -1244.1162109375, |
|
"loss": 0.0774, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.20607724785804749, |
|
"rewards/margins": 0.28292593359947205, |
|
"rewards/rejected": -0.48900318145751953, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3430237011767166e-06, |
|
"logits/chosen": -1.9861704111099243, |
|
"logits/rejected": -1.1126511096954346, |
|
"logps/chosen": -685.7115478515625, |
|
"logps/rejected": -1307.9677734375, |
|
"loss": 0.0711, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.15337470173835754, |
|
"rewards/margins": 0.3153078854084015, |
|
"rewards/rejected": -0.46868258714675903, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.319805700686257e-06, |
|
"logits/chosen": -1.466512680053711, |
|
"logits/rejected": -0.9953567385673523, |
|
"logps/chosen": -656.9564819335938, |
|
"logps/rejected": -1330.86572265625, |
|
"loss": 0.0865, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.20789778232574463, |
|
"rewards/margins": 0.27107080817222595, |
|
"rewards/rejected": -0.4789685606956482, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.296603313330355e-06, |
|
"logits/chosen": -1.7993675470352173, |
|
"logits/rejected": -0.795965313911438, |
|
"logps/chosen": -716.7975463867188, |
|
"logps/rejected": -1341.012451171875, |
|
"loss": 0.0413, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.16811136901378632, |
|
"rewards/margins": 0.3270968794822693, |
|
"rewards/rejected": -0.4952082633972168, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2734185495055503e-06, |
|
"logits/chosen": -1.7564256191253662, |
|
"logits/rejected": -1.0065138339996338, |
|
"logps/chosen": -605.0843505859375, |
|
"logps/rejected": -1259.658447265625, |
|
"loss": 0.0639, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13863322138786316, |
|
"rewards/margins": 0.3194582462310791, |
|
"rewards/rejected": -0.4580914378166199, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.250253418081373e-06, |
|
"logits/chosen": -1.7674392461776733, |
|
"logits/rejected": -1.2567976713180542, |
|
"logps/chosen": -696.3572998046875, |
|
"logps/rejected": -1424.5084228515625, |
|
"loss": 0.0523, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.20887522399425507, |
|
"rewards/margins": 0.35076838731765747, |
|
"rewards/rejected": -0.5596436262130737, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.22710992622628e-06, |
|
"logits/chosen": -1.7430893182754517, |
|
"logits/rejected": -1.2086124420166016, |
|
"logps/chosen": -602.26513671875, |
|
"logps/rejected": -1213.3702392578125, |
|
"loss": 0.0766, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.17254558205604553, |
|
"rewards/margins": 0.29195210337638855, |
|
"rewards/rejected": -0.46449774503707886, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2039900792337477e-06, |
|
"logits/chosen": -1.5437920093536377, |
|
"logits/rejected": -1.2289955615997314, |
|
"logps/chosen": -678.7044677734375, |
|
"logps/rejected": -1379.6451416015625, |
|
"loss": 0.0519, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.19320425391197205, |
|
"rewards/margins": 0.32549750804901123, |
|
"rewards/rejected": -0.5187016725540161, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1808958803485134e-06, |
|
"logits/chosen": -1.5962891578674316, |
|
"logits/rejected": -1.3936455249786377, |
|
"logps/chosen": -606.307373046875, |
|
"logps/rejected": -1305.8978271484375, |
|
"loss": 0.0898, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2137691229581833, |
|
"rewards/margins": 0.2590574324131012, |
|
"rewards/rejected": -0.4728265702724457, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.157829330593008e-06, |
|
"logits/chosen": -1.7511285543441772, |
|
"logits/rejected": -0.974310576915741, |
|
"logps/chosen": -633.572509765625, |
|
"logps/rejected": -1414.367431640625, |
|
"loss": 0.0364, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18712595105171204, |
|
"rewards/margins": 0.37235841155052185, |
|
"rewards/rejected": -0.5594843626022339, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.134792428593971e-06, |
|
"logits/chosen": -1.644934058189392, |
|
"logits/rejected": -1.11379075050354, |
|
"logps/chosen": -639.0684814453125, |
|
"logps/rejected": -1331.093505859375, |
|
"loss": 0.0665, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2170993536710739, |
|
"rewards/margins": 0.3065374493598938, |
|
"rewards/rejected": -0.5236367583274841, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1117871704092818e-06, |
|
"logits/chosen": -1.7506024837493896, |
|
"logits/rejected": -0.7958993911743164, |
|
"logps/chosen": -705.9612426757812, |
|
"logps/rejected": -1230.204833984375, |
|
"loss": 0.074, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.18289552628993988, |
|
"rewards/margins": 0.2924391031265259, |
|
"rewards/rejected": -0.47533464431762695, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0888155493550027e-06, |
|
"logits/chosen": -1.74947190284729, |
|
"logits/rejected": -1.175241231918335, |
|
"logps/chosen": -695.1935424804688, |
|
"logps/rejected": -1281.6820068359375, |
|
"loss": 0.0659, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.227014422416687, |
|
"rewards/margins": 0.3012952506542206, |
|
"rewards/rejected": -0.5283096432685852, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"logits/chosen": -1.3126758337020874, |
|
"logits/rejected": -0.9601501226425171, |
|
"logps/chosen": -722.9898681640625, |
|
"logps/rejected": -1265.3829345703125, |
|
"loss": 0.0976, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.26479509472846985, |
|
"rewards/margins": 0.2494693100452423, |
|
"rewards/rejected": -0.5142643451690674, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0429811771568468e-06, |
|
"logits/chosen": -1.9693552255630493, |
|
"logits/rejected": -1.2665364742279053, |
|
"logps/chosen": -614.6461181640625, |
|
"logps/rejected": -1254.004150390625, |
|
"loss": 0.0682, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18582487106323242, |
|
"rewards/margins": 0.31714367866516113, |
|
"rewards/rejected": -0.5029684901237488, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0201223973828917e-06, |
|
"logits/chosen": -1.4698238372802734, |
|
"logits/rejected": -0.9227995872497559, |
|
"logps/chosen": -671.4839477539062, |
|
"logps/rejected": -1292.746826171875, |
|
"loss": 0.089, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.26453524827957153, |
|
"rewards/margins": 0.270897775888443, |
|
"rewards/rejected": -0.5354331135749817, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.997305197135089e-06, |
|
"logits/chosen": -1.694977045059204, |
|
"logits/rejected": -1.0786057710647583, |
|
"logps/chosen": -716.204833984375, |
|
"logps/rejected": -1146.710205078125, |
|
"loss": 0.0988, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.23150472342967987, |
|
"rewards/margins": 0.21505391597747803, |
|
"rewards/rejected": -0.4465586245059967, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9745315534350157e-06, |
|
"logits/chosen": -1.5376017093658447, |
|
"logits/rejected": -1.0097434520721436, |
|
"logps/chosen": -613.5394287109375, |
|
"logps/rejected": -1213.8609619140625, |
|
"loss": 0.0891, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1808188557624817, |
|
"rewards/margins": 0.28589263558387756, |
|
"rewards/rejected": -0.46671146154403687, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9518034395302413e-06, |
|
"logits/chosen": -1.6548999547958374, |
|
"logits/rejected": -1.14958655834198, |
|
"logps/chosen": -715.3568115234375, |
|
"logps/rejected": -1187.842529296875, |
|
"loss": 0.0824, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.22143769264221191, |
|
"rewards/margins": 0.2518152594566345, |
|
"rewards/rejected": -0.47325292229652405, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9291228247233607e-06, |
|
"logits/chosen": -1.728780746459961, |
|
"logits/rejected": -1.0694595575332642, |
|
"logps/chosen": -720.1282958984375, |
|
"logps/rejected": -1408.6763916015625, |
|
"loss": 0.1004, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.22856228053569794, |
|
"rewards/margins": 0.35453858971595764, |
|
"rewards/rejected": -0.5831009149551392, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9064916742013515e-06, |
|
"logits/chosen": -1.483896017074585, |
|
"logits/rejected": -0.8495844602584839, |
|
"logps/chosen": -686.5977783203125, |
|
"logps/rejected": -1382.664794921875, |
|
"loss": 0.0613, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.24766767024993896, |
|
"rewards/margins": 0.31919145584106445, |
|
"rewards/rejected": -0.5668591260910034, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.883911948865306e-06, |
|
"logits/chosen": -1.490492582321167, |
|
"logits/rejected": -0.8722866177558899, |
|
"logps/chosen": -710.7138671875, |
|
"logps/rejected": -1393.343994140625, |
|
"loss": 0.1017, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2688906192779541, |
|
"rewards/margins": 0.2782156765460968, |
|
"rewards/rejected": -0.5471062660217285, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8613856051605242e-06, |
|
"logits/chosen": -1.5532639026641846, |
|
"logits/rejected": -1.1383737325668335, |
|
"logps/chosen": -775.1503295898438, |
|
"logps/rejected": -1363.999267578125, |
|
"loss": 0.094, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2418668568134308, |
|
"rewards/margins": 0.28089746832847595, |
|
"rewards/rejected": -0.5227643251419067, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8389145949069953e-06, |
|
"logits/chosen": -1.7076427936553955, |
|
"logits/rejected": -1.0624377727508545, |
|
"logps/chosen": -605.94287109375, |
|
"logps/rejected": -1299.9039306640625, |
|
"loss": 0.0606, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1737835705280304, |
|
"rewards/margins": 0.30216971039772034, |
|
"rewards/rejected": -0.47595319151878357, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.816500865130279e-06, |
|
"logits/chosen": -1.704677939414978, |
|
"logits/rejected": -1.0432106256484985, |
|
"logps/chosen": -584.9574584960938, |
|
"logps/rejected": -1062.80078125, |
|
"loss": 0.1013, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.17347513139247894, |
|
"rewards/margins": 0.24399061501026154, |
|
"rewards/rejected": -0.4174656867980957, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7941463578928088e-06, |
|
"logits/chosen": -1.4847257137298584, |
|
"logits/rejected": -1.036319375038147, |
|
"logps/chosen": -565.1887817382812, |
|
"logps/rejected": -1178.5665283203125, |
|
"loss": 0.0822, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.18391203880310059, |
|
"rewards/margins": 0.2579399347305298, |
|
"rewards/rejected": -0.44185200333595276, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7718530101256115e-06, |
|
"logits/chosen": -1.7321197986602783, |
|
"logits/rejected": -1.0332145690917969, |
|
"logps/chosen": -719.0687866210938, |
|
"logps/rejected": -1425.6676025390625, |
|
"loss": 0.052, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.23235881328582764, |
|
"rewards/margins": 0.32954907417297363, |
|
"rewards/rejected": -0.5619078874588013, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7496227534604859e-06, |
|
"logits/chosen": -1.789006233215332, |
|
"logits/rejected": -1.0403592586517334, |
|
"logps/chosen": -728.0673828125, |
|
"logps/rejected": -1402.5009765625, |
|
"loss": 0.0629, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19960185885429382, |
|
"rewards/margins": 0.30768799781799316, |
|
"rewards/rejected": -0.5072898864746094, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -1.9194923639297485, |
|
"logits/rejected": -1.3303556442260742, |
|
"logps/chosen": -637.9463500976562, |
|
"logps/rejected": -1268.9755859375, |
|
"loss": 0.0732, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1636119931936264, |
|
"rewards/margins": 0.2945595383644104, |
|
"rewards/rejected": -0.458171546459198, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7053592124637557e-06, |
|
"logits/chosen": -1.7786033153533936, |
|
"logits/rejected": -1.103823184967041, |
|
"logps/chosen": -694.8350219726562, |
|
"logps/rejected": -1348.7509765625, |
|
"loss": 0.0788, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.20321345329284668, |
|
"rewards/margins": 0.2868782877922058, |
|
"rewards/rejected": -0.49009180068969727, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6833297633956647e-06, |
|
"logits/chosen": -1.6945937871932983, |
|
"logits/rejected": -1.1909369230270386, |
|
"logps/chosen": -574.365966796875, |
|
"logps/rejected": -1239.800537109375, |
|
"loss": 0.0705, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1648532599210739, |
|
"rewards/margins": 0.2992625832557678, |
|
"rewards/rejected": -0.46411579847335815, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.661371075624363e-06, |
|
"logits/chosen": -1.5424587726593018, |
|
"logits/rejected": -1.3184611797332764, |
|
"logps/chosen": -526.9766845703125, |
|
"logps/rejected": -1158.5274658203125, |
|
"loss": 0.1027, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.16265107691287994, |
|
"rewards/margins": 0.26259398460388184, |
|
"rewards/rejected": -0.42524510622024536, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6394850517846621e-06, |
|
"logits/chosen": -1.6765334606170654, |
|
"logits/rejected": -0.9357019662857056, |
|
"logps/chosen": -637.3072509765625, |
|
"logps/rejected": -1350.0201416015625, |
|
"loss": 0.0483, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1886545568704605, |
|
"rewards/margins": 0.32397735118865967, |
|
"rewards/rejected": -0.5126319527626038, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6176735882153284e-06, |
|
"logits/chosen": -1.4673728942871094, |
|
"logits/rejected": -0.7847996950149536, |
|
"logps/chosen": -721.7418212890625, |
|
"logps/rejected": -1300.6024169921875, |
|
"loss": 0.07, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.20094910264015198, |
|
"rewards/margins": 0.28207510709762573, |
|
"rewards/rejected": -0.4830242097377777, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5959385747947697e-06, |
|
"logits/chosen": -1.8030465841293335, |
|
"logits/rejected": -1.1619970798492432, |
|
"logps/chosen": -672.4512329101562, |
|
"logps/rejected": -1321.999267578125, |
|
"loss": 0.0729, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20753267407417297, |
|
"rewards/margins": 0.27218765020370483, |
|
"rewards/rejected": -0.4797203540802002, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5742818947772875e-06, |
|
"logits/chosen": -1.4533730745315552, |
|
"logits/rejected": -0.9818112254142761, |
|
"logps/chosen": -737.9328002929688, |
|
"logps/rejected": -1423.608154296875, |
|
"loss": 0.0509, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.23550765216350555, |
|
"rewards/margins": 0.3242724537849426, |
|
"rewards/rejected": -0.5597800612449646, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.552705424629898e-06, |
|
"logits/chosen": -1.8896198272705078, |
|
"logits/rejected": -1.0289218425750732, |
|
"logps/chosen": -772.728515625, |
|
"logps/rejected": -1228.538818359375, |
|
"loss": 0.0765, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2038073092699051, |
|
"rewards/margins": 0.26559290289878845, |
|
"rewards/rejected": -0.4694002568721771, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5312110338697427e-06, |
|
"logits/chosen": -1.698685646057129, |
|
"logits/rejected": -1.106320858001709, |
|
"logps/chosen": -706.8604736328125, |
|
"logps/rejected": -1375.6136474609375, |
|
"loss": 0.0583, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.21833567321300507, |
|
"rewards/margins": 0.31459516286849976, |
|
"rewards/rejected": -0.5329307317733765, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.509800584902108e-06, |
|
"logits/chosen": -1.5073596239089966, |
|
"logits/rejected": -0.9609388113021851, |
|
"logps/chosen": -698.6984252929688, |
|
"logps/rejected": -1362.0128173828125, |
|
"loss": 0.0613, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.20453910529613495, |
|
"rewards/margins": 0.3083449900150299, |
|
"rewards/rejected": -0.5128840208053589, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4884759328590476e-06, |
|
"logits/chosen": -1.6541383266448975, |
|
"logits/rejected": -1.2222260236740112, |
|
"logps/chosen": -686.1484375, |
|
"logps/rejected": -1445.005615234375, |
|
"loss": 0.0586, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.20519807934761047, |
|
"rewards/margins": 0.32793715596199036, |
|
"rewards/rejected": -0.5331352353096008, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.467238925438646e-06, |
|
"logits/chosen": -1.5963326692581177, |
|
"logits/rejected": -1.0982022285461426, |
|
"logps/chosen": -682.7689208984375, |
|
"logps/rejected": -1145.491943359375, |
|
"loss": 0.1334, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.21956536173820496, |
|
"rewards/margins": 0.209082692861557, |
|
"rewards/rejected": -0.42864808440208435, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.446091402744923e-06, |
|
"logits/chosen": -1.4184305667877197, |
|
"logits/rejected": -0.9969242215156555, |
|
"logps/chosen": -631.0663452148438, |
|
"logps/rejected": -1298.8023681640625, |
|
"loss": 0.0789, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2030848264694214, |
|
"rewards/margins": 0.2959844470024109, |
|
"rewards/rejected": -0.4990692138671875, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4250351971283937e-06, |
|
"logits/chosen": -1.6301358938217163, |
|
"logits/rejected": -1.1058179140090942, |
|
"logps/chosen": -733.7390747070312, |
|
"logps/rejected": -1312.249267578125, |
|
"loss": 0.0789, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2173445224761963, |
|
"rewards/margins": 0.2981737554073334, |
|
"rewards/rejected": -0.515518307685852, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4040721330273063e-06, |
|
"logits/chosen": -1.737184762954712, |
|
"logits/rejected": -0.8671928644180298, |
|
"logps/chosen": -759.1162719726562, |
|
"logps/rejected": -1388.3258056640625, |
|
"loss": 0.0713, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2206466943025589, |
|
"rewards/margins": 0.301429808139801, |
|
"rewards/rejected": -0.5220764875411987, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3832040268095589e-06, |
|
"logits/chosen": -1.6039609909057617, |
|
"logits/rejected": -0.8329612612724304, |
|
"logps/chosen": -676.49755859375, |
|
"logps/rejected": -1311.5169677734375, |
|
"loss": 0.0776, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1940799504518509, |
|
"rewards/margins": 0.3204619288444519, |
|
"rewards/rejected": -0.5145418643951416, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.362432686615316e-06, |
|
"logits/chosen": -1.7901986837387085, |
|
"logits/rejected": -1.0667396783828735, |
|
"logps/chosen": -656.2354736328125, |
|
"logps/rejected": -1316.062744140625, |
|
"loss": 0.0758, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1804797351360321, |
|
"rewards/margins": 0.30354127287864685, |
|
"rewards/rejected": -0.48402100801467896, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3417599122003464e-06, |
|
"logits/chosen": -1.5734020471572876, |
|
"logits/rejected": -1.0475807189941406, |
|
"logps/chosen": -566.615966796875, |
|
"logps/rejected": -1251.5736083984375, |
|
"loss": 0.0652, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.16305221617221832, |
|
"rewards/margins": 0.3017520308494568, |
|
"rewards/rejected": -0.4648042619228363, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3211874947800747e-06, |
|
"logits/chosen": -1.821344017982483, |
|
"logits/rejected": -1.1782556772232056, |
|
"logps/chosen": -690.4012451171875, |
|
"logps/rejected": -1318.4063720703125, |
|
"loss": 0.0693, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.21744892001152039, |
|
"rewards/margins": 0.29652294516563416, |
|
"rewards/rejected": -0.5139719247817993, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3007172168743854e-06, |
|
"logits/chosen": -1.438684105873108, |
|
"logits/rejected": -0.9294763803482056, |
|
"logps/chosen": -742.8309326171875, |
|
"logps/rejected": -1415.3411865234375, |
|
"loss": 0.0694, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2644263803958893, |
|
"rewards/margins": 0.3054746091365814, |
|
"rewards/rejected": -0.5699009895324707, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.280350852153168e-06, |
|
"logits/chosen": -1.6277799606323242, |
|
"logits/rejected": -1.0877095460891724, |
|
"logps/chosen": -703.9315795898438, |
|
"logps/rejected": -1340.6427001953125, |
|
"loss": 0.0651, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.23966872692108154, |
|
"rewards/margins": 0.2736600339412689, |
|
"rewards/rejected": -0.5133287310600281, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.260090165282645e-06, |
|
"logits/chosen": -1.6082645654678345, |
|
"logits/rejected": -0.9585866928100586, |
|
"logps/chosen": -733.3088989257812, |
|
"logps/rejected": -1261.9248046875, |
|
"loss": 0.0997, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.23271843791007996, |
|
"rewards/margins": 0.248284250497818, |
|
"rewards/rejected": -0.48100265860557556, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2399369117724582e-06, |
|
"logits/chosen": -1.703330397605896, |
|
"logits/rejected": -0.9837586283683777, |
|
"logps/chosen": -662.8977661132812, |
|
"logps/rejected": -1302.231201171875, |
|
"loss": 0.0872, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17935553193092346, |
|
"rewards/margins": 0.30029359459877014, |
|
"rewards/rejected": -0.4796491265296936, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2198928378235717e-06, |
|
"logits/chosen": -1.6038057804107666, |
|
"logits/rejected": -1.2413532733917236, |
|
"logps/chosen": -518.2135009765625, |
|
"logps/rejected": -1082.8677978515625, |
|
"loss": 0.1043, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16320912539958954, |
|
"rewards/margins": 0.2511638402938843, |
|
"rewards/rejected": -0.414372980594635, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1999596801769617e-06, |
|
"logits/chosen": -1.6547168493270874, |
|
"logits/rejected": -1.0749003887176514, |
|
"logps/chosen": -674.54541015625, |
|
"logps/rejected": -1409.031982421875, |
|
"loss": 0.0616, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.18622934818267822, |
|
"rewards/margins": 0.3446107506752014, |
|
"rewards/rejected": -0.5308400988578796, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1801391659631423e-06, |
|
"logits/chosen": -1.5979634523391724, |
|
"logits/rejected": -1.0661733150482178, |
|
"logps/chosen": -460.7833557128906, |
|
"logps/rejected": -1178.478759765625, |
|
"loss": 0.055, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1438596248626709, |
|
"rewards/margins": 0.30475491285324097, |
|
"rewards/rejected": -0.4486145079135895, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.160433012552508e-06, |
|
"logits/chosen": -1.8192787170410156, |
|
"logits/rejected": -1.0415807962417603, |
|
"logps/chosen": -610.6654052734375, |
|
"logps/rejected": -1276.691162109375, |
|
"loss": 0.0536, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.15597601234912872, |
|
"rewards/margins": 0.3177304267883301, |
|
"rewards/rejected": -0.4737063944339752, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1408429274065418e-06, |
|
"logits/chosen": -1.4305180311203003, |
|
"logits/rejected": -1.164957880973816, |
|
"logps/chosen": -624.7916870117188, |
|
"logps/rejected": -1304.414306640625, |
|
"loss": 0.0804, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18183110654354095, |
|
"rewards/margins": 0.29839175939559937, |
|
"rewards/rejected": -0.4802228808403015, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1213706079298566e-06, |
|
"logits/chosen": -1.6688525676727295, |
|
"logits/rejected": -1.1245152950286865, |
|
"logps/chosen": -636.1717529296875, |
|
"logps/rejected": -1271.97802734375, |
|
"loss": 0.0642, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19803071022033691, |
|
"rewards/margins": 0.29812103509902954, |
|
"rewards/rejected": -0.49615174531936646, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1020177413231334e-06, |
|
"logits/chosen": -1.4946445226669312, |
|
"logits/rejected": -0.8901990056037903, |
|
"logps/chosen": -699.8790893554688, |
|
"logps/rejected": -1338.4324951171875, |
|
"loss": 0.0797, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2173963338136673, |
|
"rewards/margins": 0.31975775957107544, |
|
"rewards/rejected": -0.5371540784835815, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0827860044369226e-06, |
|
"logits/chosen": -1.8231594562530518, |
|
"logits/rejected": -0.8806201815605164, |
|
"logps/chosen": -766.8648071289062, |
|
"logps/rejected": -1401.1787109375, |
|
"loss": 0.05, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.21393120288848877, |
|
"rewards/margins": 0.3332614004611969, |
|
"rewards/rejected": -0.5471926331520081, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.06367706362636e-06, |
|
"logits/chosen": -1.6795213222503662, |
|
"logits/rejected": -1.0750644207000732, |
|
"logps/chosen": -631.3731689453125, |
|
"logps/rejected": -1225.7191162109375, |
|
"loss": 0.0755, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.18206752836704254, |
|
"rewards/margins": 0.2682720720767975, |
|
"rewards/rejected": -0.45033949613571167, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0446925746067768e-06, |
|
"logits/chosen": -1.4441897869110107, |
|
"logits/rejected": -0.9866140484809875, |
|
"logps/chosen": -579.9114379882812, |
|
"logps/rejected": -1187.9873046875, |
|
"loss": 0.0804, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16464915871620178, |
|
"rewards/margins": 0.2846793234348297, |
|
"rewards/rejected": -0.4493285119533539, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0258341823102418e-06, |
|
"logits/chosen": -1.6576883792877197, |
|
"logits/rejected": -1.0028798580169678, |
|
"logps/chosen": -635.6063232421875, |
|
"logps/rejected": -1112.1954345703125, |
|
"loss": 0.0729, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.16143657267093658, |
|
"rewards/margins": 0.22869448363780975, |
|
"rewards/rejected": -0.39013105630874634, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0071035207430352e-06, |
|
"logits/chosen": -1.6806875467300415, |
|
"logits/rejected": -0.9319907426834106, |
|
"logps/chosen": -598.1236572265625, |
|
"logps/rejected": -1265.469970703125, |
|
"loss": 0.0684, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.14364568889141083, |
|
"rewards/margins": 0.3332282602787018, |
|
"rewards/rejected": -0.4768740236759186, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.88502212844063e-07, |
|
"logits/chosen": -1.6131244897842407, |
|
"logits/rejected": -1.3068736791610718, |
|
"logps/chosen": -547.2989501953125, |
|
"logps/rejected": -1120.500244140625, |
|
"loss": 0.0879, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1456841677427292, |
|
"rewards/margins": 0.27742549777030945, |
|
"rewards/rejected": -0.42310968041419983, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.700318703442437e-07, |
|
"logits/chosen": -1.7162357568740845, |
|
"logits/rejected": -1.1048786640167236, |
|
"logps/chosen": -623.9015502929688, |
|
"logps/rejected": -1287.5198974609375, |
|
"loss": 0.0766, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15075866878032684, |
|
"rewards/margins": 0.31179898977279663, |
|
"rewards/rejected": -0.4625576436519623, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.516940936268504e-07, |
|
"logits/chosen": -1.7830989360809326, |
|
"logits/rejected": -1.1697231531143188, |
|
"logps/chosen": -662.9132080078125, |
|
"logps/rejected": -1347.837646484375, |
|
"loss": 0.0518, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.21208259463310242, |
|
"rewards/margins": 0.3107925057411194, |
|
"rewards/rejected": -0.5228751301765442, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.334904715888496e-07, |
|
"logits/chosen": -1.7367998361587524, |
|
"logits/rejected": -0.8941909670829773, |
|
"logps/chosen": -727.5262451171875, |
|
"logps/rejected": -1364.0263671875, |
|
"loss": 0.072, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1890576183795929, |
|
"rewards/margins": 0.31730249524116516, |
|
"rewards/rejected": -0.5063600540161133, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.154225815032242e-07, |
|
"logits/chosen": -1.7351534366607666, |
|
"logits/rejected": -0.9410954713821411, |
|
"logps/chosen": -572.9583740234375, |
|
"logps/rejected": -1397.9078369140625, |
|
"loss": 0.0414, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.14058896899223328, |
|
"rewards/margins": 0.37250643968582153, |
|
"rewards/rejected": -0.5130953788757324, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.974919888823164e-07, |
|
"logits/chosen": -1.6442314386367798, |
|
"logits/rejected": -1.245091438293457, |
|
"logps/chosen": -625.5496215820312, |
|
"logps/rejected": -1145.61865234375, |
|
"loss": 0.1189, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1761692762374878, |
|
"rewards/margins": 0.21311041712760925, |
|
"rewards/rejected": -0.38927969336509705, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.797002473421729e-07, |
|
"logits/chosen": -1.5787712335586548, |
|
"logits/rejected": -1.1393609046936035, |
|
"logps/chosen": -564.6102905273438, |
|
"logps/rejected": -1168.3687744140625, |
|
"loss": 0.0749, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.15366871654987335, |
|
"rewards/margins": 0.2722090780735016, |
|
"rewards/rejected": -0.42587780952453613, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.620488984679378e-07, |
|
"logits/chosen": -1.747091293334961, |
|
"logits/rejected": -1.1368491649627686, |
|
"logps/chosen": -637.8484497070312, |
|
"logps/rejected": -1327.956298828125, |
|
"loss": 0.0344, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.18866987526416779, |
|
"rewards/margins": 0.32480621337890625, |
|
"rewards/rejected": -0.5134760737419128, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.445394716802754e-07, |
|
"logits/chosen": -1.7131919860839844, |
|
"logits/rejected": -1.1550548076629639, |
|
"logps/chosen": -685.2410888671875, |
|
"logps/rejected": -1205.682861328125, |
|
"loss": 0.097, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20656362175941467, |
|
"rewards/margins": 0.2589021325111389, |
|
"rewards/rejected": -0.4654656946659088, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.271734841028553e-07, |
|
"logits/chosen": -1.637920618057251, |
|
"logits/rejected": -1.3879964351654053, |
|
"logps/chosen": -607.6511840820312, |
|
"logps/rejected": -1199.6259765625, |
|
"loss": 0.0784, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1954393833875656, |
|
"rewards/margins": 0.25878021121025085, |
|
"rewards/rejected": -0.4542195796966553, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.099524404308948e-07, |
|
"logits/chosen": -1.4263249635696411, |
|
"logits/rejected": -1.002600908279419, |
|
"logps/chosen": -636.8242797851562, |
|
"logps/rejected": -1328.989501953125, |
|
"loss": 0.0539, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2097894698381424, |
|
"rewards/margins": 0.3159825801849365, |
|
"rewards/rejected": -0.5257720351219177, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.928778328007918e-07, |
|
"logits/chosen": -1.884615182876587, |
|
"logits/rejected": -0.9678564071655273, |
|
"logps/chosen": -658.4131469726562, |
|
"logps/rejected": -1356.146240234375, |
|
"loss": 0.0639, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.20153768360614777, |
|
"rewards/margins": 0.3334410786628723, |
|
"rewards/rejected": -0.5349787473678589, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.759511406608255e-07, |
|
"logits/chosen": -1.3170682191848755, |
|
"logits/rejected": -0.7453212141990662, |
|
"logps/chosen": -650.6473999023438, |
|
"logps/rejected": -1283.6512451171875, |
|
"loss": 0.0787, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.20364892482757568, |
|
"rewards/margins": 0.28083834052085876, |
|
"rewards/rejected": -0.48448723554611206, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.591738306429769e-07, |
|
"logits/chosen": -1.6163628101348877, |
|
"logits/rejected": -0.9143250584602356, |
|
"logps/chosen": -624.8411865234375, |
|
"logps/rejected": -1358.2152099609375, |
|
"loss": 0.05, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.17616188526153564, |
|
"rewards/margins": 0.3604372441768646, |
|
"rewards/rejected": -0.5365991592407227, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.425473564358457e-07, |
|
"logits/chosen": -1.734678030014038, |
|
"logits/rejected": -1.1536650657653809, |
|
"logps/chosen": -641.8426513671875, |
|
"logps/rejected": -1439.745849609375, |
|
"loss": 0.0529, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.19275933504104614, |
|
"rewards/margins": 0.3525656759738922, |
|
"rewards/rejected": -0.5453251004219055, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.260731586586983e-07, |
|
"logits/chosen": -1.6836907863616943, |
|
"logits/rejected": -0.9171245694160461, |
|
"logps/chosen": -621.8990478515625, |
|
"logps/rejected": -1194.991943359375, |
|
"loss": 0.0742, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15242193639278412, |
|
"rewards/margins": 0.2880454659461975, |
|
"rewards/rejected": -0.4404674172401428, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.097526647366379e-07, |
|
"logits/chosen": -1.5863702297210693, |
|
"logits/rejected": -1.0138695240020752, |
|
"logps/chosen": -717.5474853515625, |
|
"logps/rejected": -1396.6239013671875, |
|
"loss": 0.0631, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.19577325880527496, |
|
"rewards/margins": 0.3056796193122864, |
|
"rewards/rejected": -0.5014528632164001, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.935872887769299e-07, |
|
"logits/chosen": -1.5059714317321777, |
|
"logits/rejected": -0.9991394281387329, |
|
"logps/chosen": -704.1346435546875, |
|
"logps/rejected": -1267.742919921875, |
|
"loss": 0.0853, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2067212611436844, |
|
"rewards/margins": 0.26990193128585815, |
|
"rewards/rejected": -0.4766232371330261, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.775784314464717e-07, |
|
"logits/chosen": -1.5541185140609741, |
|
"logits/rejected": -1.0399705171585083, |
|
"logps/chosen": -592.094482421875, |
|
"logps/rejected": -1375.5118408203125, |
|
"loss": 0.0491, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.19767038524150848, |
|
"rewards/margins": 0.3308340013027191, |
|
"rewards/rejected": -0.5285044312477112, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.617274798504286e-07, |
|
"logits/chosen": -1.4966031312942505, |
|
"logits/rejected": -0.8529815673828125, |
|
"logps/chosen": -752.8843994140625, |
|
"logps/rejected": -1323.464599609375, |
|
"loss": 0.0798, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.24442438781261444, |
|
"rewards/margins": 0.27123716473579407, |
|
"rewards/rejected": -0.5156615972518921, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.460358074120518e-07, |
|
"logits/chosen": -1.9015638828277588, |
|
"logits/rejected": -1.1131356954574585, |
|
"logps/chosen": -598.8160400390625, |
|
"logps/rejected": -1302.127197265625, |
|
"loss": 0.0637, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15112538635730743, |
|
"rewards/margins": 0.34708380699157715, |
|
"rewards/rejected": -0.4982091784477234, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.305047737536707e-07, |
|
"logits/chosen": -1.577651858329773, |
|
"logits/rejected": -1.0997803211212158, |
|
"logps/chosen": -706.3680419921875, |
|
"logps/rejected": -1251.1690673828125, |
|
"loss": 0.0795, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.22735171020030975, |
|
"rewards/margins": 0.2633035480976105, |
|
"rewards/rejected": -0.4906553328037262, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.151357245788917e-07, |
|
"logits/chosen": -1.5803357362747192, |
|
"logits/rejected": -1.0134937763214111, |
|
"logps/chosen": -787.1359252929688, |
|
"logps/rejected": -1268.341064453125, |
|
"loss": 0.0776, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2360551357269287, |
|
"rewards/margins": 0.2515987157821655, |
|
"rewards/rejected": -0.4876538813114166, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.999299915559956e-07, |
|
"logits/chosen": -1.863478422164917, |
|
"logits/rejected": -1.186255693435669, |
|
"logps/chosen": -670.4616088867188, |
|
"logps/rejected": -1253.962646484375, |
|
"loss": 0.0807, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.18092487752437592, |
|
"rewards/margins": 0.2903370261192322, |
|
"rewards/rejected": -0.4712619185447693, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.848888922025553e-07, |
|
"logits/chosen": -1.4436417818069458, |
|
"logits/rejected": -1.0620644092559814, |
|
"logps/chosen": -752.1005859375, |
|
"logps/rejected": -1337.766357421875, |
|
"loss": 0.0742, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.252055823802948, |
|
"rewards/margins": 0.2509405016899109, |
|
"rewards/rejected": -0.5029963254928589, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.700137297712749e-07, |
|
"logits/chosen": -1.4030582904815674, |
|
"logits/rejected": -1.0030864477157593, |
|
"logps/chosen": -657.2237548828125, |
|
"logps/rejected": -1292.746826171875, |
|
"loss": 0.0651, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.227573424577713, |
|
"rewards/margins": 0.301101952791214, |
|
"rewards/rejected": -0.5286754369735718, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.553057931370729e-07, |
|
"logits/chosen": -1.7350914478302002, |
|
"logits/rejected": -1.2158474922180176, |
|
"logps/chosen": -638.9827270507812, |
|
"logps/rejected": -1461.6263427734375, |
|
"loss": 0.0521, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19272522628307343, |
|
"rewards/margins": 0.3386714458465576, |
|
"rewards/rejected": -0.5313966274261475, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.407663566854008e-07, |
|
"logits/chosen": -1.6435270309448242, |
|
"logits/rejected": -1.0143921375274658, |
|
"logps/chosen": -617.9176635742188, |
|
"logps/rejected": -1159.302734375, |
|
"loss": 0.0887, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.19033944606781006, |
|
"rewards/margins": 0.2596290409564972, |
|
"rewards/rejected": -0.44996848702430725, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.263966802018275e-07, |
|
"logits/chosen": -1.5586180686950684, |
|
"logits/rejected": -0.9451137781143188, |
|
"logps/chosen": -577.8426513671875, |
|
"logps/rejected": -1369.449951171875, |
|
"loss": 0.0498, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.17990294098854065, |
|
"rewards/margins": 0.33183303475379944, |
|
"rewards/rejected": -0.5117359757423401, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.121980087628802e-07, |
|
"logits/chosen": -1.7278461456298828, |
|
"logits/rejected": -1.0558230876922607, |
|
"logps/chosen": -643.5891723632812, |
|
"logps/rejected": -1279.1890869140625, |
|
"loss": 0.0769, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.19444116950035095, |
|
"rewards/margins": 0.27763885259628296, |
|
"rewards/rejected": -0.4720799922943115, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.981715726281666e-07, |
|
"logits/chosen": -1.3910518884658813, |
|
"logits/rejected": -0.9968549609184265, |
|
"logps/chosen": -648.2385864257812, |
|
"logps/rejected": -1314.0189208984375, |
|
"loss": 0.0556, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.20168296992778778, |
|
"rewards/margins": 0.27340278029441833, |
|
"rewards/rejected": -0.4750857949256897, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.843185871337722e-07, |
|
"logits/chosen": -1.7175220251083374, |
|
"logits/rejected": -1.0445207357406616, |
|
"logps/chosen": -652.8839111328125, |
|
"logps/rejected": -1321.618896484375, |
|
"loss": 0.0565, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16258391737937927, |
|
"rewards/margins": 0.3201899528503418, |
|
"rewards/rejected": -0.48277387022972107, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.706402525869633e-07, |
|
"logits/chosen": -1.6909739971160889, |
|
"logits/rejected": -1.2913880348205566, |
|
"logps/chosen": -679.966796875, |
|
"logps/rejected": -1319.14794921875, |
|
"loss": 0.0825, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1967785656452179, |
|
"rewards/margins": 0.2842392921447754, |
|
"rewards/rejected": -0.4810178279876709, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.5713775416217884e-07, |
|
"logits/chosen": -1.450941801071167, |
|
"logits/rejected": -1.0710570812225342, |
|
"logps/chosen": -729.031494140625, |
|
"logps/rejected": -1396.8394775390625, |
|
"loss": 0.0777, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.23177771270275116, |
|
"rewards/margins": 0.28798457980155945, |
|
"rewards/rejected": -0.5197622776031494, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.438122617983442e-07, |
|
"logits/chosen": -1.2658835649490356, |
|
"logits/rejected": -0.6509383916854858, |
|
"logps/chosen": -678.7276611328125, |
|
"logps/rejected": -1277.664794921875, |
|
"loss": 0.1047, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1934930384159088, |
|
"rewards/margins": 0.27904012799263, |
|
"rewards/rejected": -0.47253313660621643, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.3066493009749853e-07, |
|
"logits/chosen": -1.8485597372055054, |
|
"logits/rejected": -1.3042134046554565, |
|
"logps/chosen": -598.8069458007812, |
|
"logps/rejected": -1223.86767578125, |
|
"loss": 0.0587, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.16556212306022644, |
|
"rewards/margins": 0.29780706763267517, |
|
"rewards/rejected": -0.4633691906929016, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1769689822475147e-07, |
|
"logits/chosen": -1.780917763710022, |
|
"logits/rejected": -1.0629148483276367, |
|
"logps/chosen": -598.9677734375, |
|
"logps/rejected": -1247.2830810546875, |
|
"loss": 0.0715, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.17943796515464783, |
|
"rewards/margins": 0.29289960861206055, |
|
"rewards/rejected": -0.472337543964386, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.049092898095816e-07, |
|
"logits/chosen": -1.6170413494110107, |
|
"logits/rejected": -1.0848805904388428, |
|
"logps/chosen": -687.8775634765625, |
|
"logps/rejected": -1320.7880859375, |
|
"loss": 0.0641, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.16832710802555084, |
|
"rewards/margins": 0.3181244730949402, |
|
"rewards/rejected": -0.4864516258239746, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9230321284847856e-07, |
|
"logits/chosen": -1.835370659828186, |
|
"logits/rejected": -0.9373771548271179, |
|
"logps/chosen": -725.0065307617188, |
|
"logps/rejected": -1372.621337890625, |
|
"loss": 0.0804, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.18989510834217072, |
|
"rewards/margins": 0.32323604822158813, |
|
"rewards/rejected": -0.5131311416625977, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.798797596089351e-07, |
|
"logits/chosen": -1.5447750091552734, |
|
"logits/rejected": -0.8559864163398743, |
|
"logps/chosen": -728.7130126953125, |
|
"logps/rejected": -1214.0345458984375, |
|
"loss": 0.0969, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2228674590587616, |
|
"rewards/margins": 0.24048006534576416, |
|
"rewards/rejected": -0.46334752440452576, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6764000653481263e-07, |
|
"logits/chosen": -1.5830670595169067, |
|
"logits/rejected": -1.0614241361618042, |
|
"logps/chosen": -623.8796997070312, |
|
"logps/rejected": -1382.993408203125, |
|
"loss": 0.0584, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.17642198503017426, |
|
"rewards/margins": 0.3370228409767151, |
|
"rewards/rejected": -0.5134447813034058, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.555850141530659e-07, |
|
"logits/chosen": -1.6500043869018555, |
|
"logits/rejected": -0.9364662170410156, |
|
"logps/chosen": -665.5390014648438, |
|
"logps/rejected": -1273.1171875, |
|
"loss": 0.054, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.14827661216259003, |
|
"rewards/margins": 0.31309622526168823, |
|
"rewards/rejected": -0.46137291193008423, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.4371582698185636e-07, |
|
"logits/chosen": -1.5176864862442017, |
|
"logits/rejected": -1.098508596420288, |
|
"logps/chosen": -559.7102661132812, |
|
"logps/rejected": -1274.8511962890625, |
|
"loss": 0.0613, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16253405809402466, |
|
"rewards/margins": 0.30766361951828003, |
|
"rewards/rejected": -0.4701976180076599, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.3203347344004737e-07, |
|
"logits/chosen": -1.6515905857086182, |
|
"logits/rejected": -1.023119568824768, |
|
"logps/chosen": -841.9296875, |
|
"logps/rejected": -1383.3951416015625, |
|
"loss": 0.0841, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2267489731311798, |
|
"rewards/margins": 0.2902681827545166, |
|
"rewards/rejected": -0.517017126083374, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.2053896575809426e-07, |
|
"logits/chosen": -1.6936334371566772, |
|
"logits/rejected": -1.1252939701080322, |
|
"logps/chosen": -680.880859375, |
|
"logps/rejected": -1214.6029052734375, |
|
"loss": 0.0888, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.20804938673973083, |
|
"rewards/margins": 0.2518993020057678, |
|
"rewards/rejected": -0.45994871854782104, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.092332998903416e-07, |
|
"logits/chosen": -1.7081215381622314, |
|
"logits/rejected": -0.7063247561454773, |
|
"logps/chosen": -839.2431640625, |
|
"logps/rejected": -1423.2156982421875, |
|
"loss": 0.053, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2406737506389618, |
|
"rewards/margins": 0.3139367997646332, |
|
"rewards/rejected": -0.5546106100082397, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.981174554287239e-07, |
|
"logits/chosen": -1.6795532703399658, |
|
"logits/rejected": -1.0844824314117432, |
|
"logps/chosen": -483.9825134277344, |
|
"logps/rejected": -1096.248779296875, |
|
"loss": 0.0943, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1309995800256729, |
|
"rewards/margins": 0.2878738045692444, |
|
"rewards/rejected": -0.4188733994960785, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.871923955178918e-07, |
|
"logits/chosen": -1.651178002357483, |
|
"logits/rejected": -0.8967401385307312, |
|
"logps/chosen": -714.80810546875, |
|
"logps/rejected": -1329.046630859375, |
|
"loss": 0.0553, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.21358323097229004, |
|
"rewards/margins": 0.28250735998153687, |
|
"rewards/rejected": -0.4960905909538269, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.764590667717562e-07, |
|
"logits/chosen": -1.6797590255737305, |
|
"logits/rejected": -1.2603545188903809, |
|
"logps/chosen": -557.3433227539062, |
|
"logps/rejected": -1234.2099609375, |
|
"loss": 0.0775, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.14634376764297485, |
|
"rewards/margins": 0.3086283206939697, |
|
"rewards/rejected": -0.4549720883369446, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6591839919146963e-07, |
|
"logits/chosen": -1.477281928062439, |
|
"logits/rejected": -0.9903861880302429, |
|
"logps/chosen": -665.1192626953125, |
|
"logps/rejected": -1360.6173095703125, |
|
"loss": 0.0868, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.20300845801830292, |
|
"rewards/margins": 0.2980322539806366, |
|
"rewards/rejected": -0.5010407567024231, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.555713060848433e-07, |
|
"logits/chosen": -1.2929660081863403, |
|
"logits/rejected": -1.0018800497055054, |
|
"logps/chosen": -685.843017578125, |
|
"logps/rejected": -1353.837158203125, |
|
"loss": 0.0817, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.21414248645305634, |
|
"rewards/margins": 0.30294984579086304, |
|
"rewards/rejected": -0.5170923471450806, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.454186839872158e-07, |
|
"logits/chosen": -1.6706600189208984, |
|
"logits/rejected": -1.0511361360549927, |
|
"logps/chosen": -549.2362670898438, |
|
"logps/rejected": -1177.3765869140625, |
|
"loss": 0.0754, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1762518584728241, |
|
"rewards/margins": 0.275063693523407, |
|
"rewards/rejected": -0.45131558179855347, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3546141258376786e-07, |
|
"logits/chosen": -1.6843605041503906, |
|
"logits/rejected": -1.0689982175827026, |
|
"logps/chosen": -718.2612915039062, |
|
"logps/rejected": -1315.400634765625, |
|
"loss": 0.1008, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20562107861042023, |
|
"rewards/margins": 0.2962108850479126, |
|
"rewards/rejected": -0.5018320083618164, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.257003546333042e-07, |
|
"logits/chosen": -1.6436039209365845, |
|
"logits/rejected": -1.0740540027618408, |
|
"logps/chosen": -530.2700805664062, |
|
"logps/rejected": -1222.441650390625, |
|
"loss": 0.0701, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.17040416598320007, |
|
"rewards/margins": 0.3154822885990143, |
|
"rewards/rejected": -0.48588642477989197, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1613635589349756e-07, |
|
"logits/chosen": -1.7491300106048584, |
|
"logits/rejected": -0.9703881144523621, |
|
"logps/chosen": -634.4661865234375, |
|
"logps/rejected": -1327.0771484375, |
|
"loss": 0.0478, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15670377016067505, |
|
"rewards/margins": 0.3363407254219055, |
|
"rewards/rejected": -0.49304452538490295, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0677024504760752e-07, |
|
"logits/chosen": -1.5196778774261475, |
|
"logits/rejected": -1.040668249130249, |
|
"logps/chosen": -678.7161865234375, |
|
"logps/rejected": -1322.600830078125, |
|
"loss": 0.0634, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.19689127802848816, |
|
"rewards/margins": 0.32300588488578796, |
|
"rewards/rejected": -0.5198971033096313, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.9760283363267684e-07, |
|
"logits/chosen": -1.5344351530075073, |
|
"logits/rejected": -1.066190481185913, |
|
"logps/chosen": -577.383056640625, |
|
"logps/rejected": -1262.2083740234375, |
|
"loss": 0.0653, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.14838716387748718, |
|
"rewards/margins": 0.31335335969924927, |
|
"rewards/rejected": -0.4617405831813812, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8863491596921745e-07, |
|
"logits/chosen": -1.5965193510055542, |
|
"logits/rejected": -1.0383847951889038, |
|
"logps/chosen": -608.2281494140625, |
|
"logps/rejected": -1189.1614990234375, |
|
"loss": 0.0814, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1726662814617157, |
|
"rewards/margins": 0.26928216218948364, |
|
"rewards/rejected": -0.44194841384887695, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.798672690923828e-07, |
|
"logits/chosen": -1.3265117406845093, |
|
"logits/rejected": -0.8752411007881165, |
|
"logps/chosen": -684.5750732421875, |
|
"logps/rejected": -1294.5103759765625, |
|
"loss": 0.0898, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.21347585320472717, |
|
"rewards/margins": 0.2741771936416626, |
|
"rewards/rejected": -0.4876530170440674, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.713006526846439e-07, |
|
"logits/chosen": -1.5635359287261963, |
|
"logits/rejected": -1.1060292720794678, |
|
"logps/chosen": -639.0496826171875, |
|
"logps/rejected": -1266.63720703125, |
|
"loss": 0.0836, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2038009911775589, |
|
"rewards/margins": 0.26461145281791687, |
|
"rewards/rejected": -0.4684123992919922, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.629358090099639e-07, |
|
"logits/chosen": -1.5050264596939087, |
|
"logits/rejected": -1.0238497257232666, |
|
"logps/chosen": -667.7071533203125, |
|
"logps/rejected": -1466.681884765625, |
|
"loss": 0.0488, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.21334633231163025, |
|
"rewards/margins": 0.3355127274990082, |
|
"rewards/rejected": -0.5488591194152832, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5477346284948292e-07, |
|
"logits/chosen": -1.8201059103012085, |
|
"logits/rejected": -1.0349600315093994, |
|
"logps/chosen": -649.684326171875, |
|
"logps/rejected": -1250.92236328125, |
|
"loss": 0.053, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1781463921070099, |
|
"rewards/margins": 0.3080436587333679, |
|
"rewards/rejected": -0.4861900210380554, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4681432143872133e-07, |
|
"logits/chosen": -1.6552870273590088, |
|
"logits/rejected": -1.2580300569534302, |
|
"logps/chosen": -656.0045166015625, |
|
"logps/rejected": -1372.251953125, |
|
"loss": 0.0669, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.19065485894680023, |
|
"rewards/margins": 0.3371616005897522, |
|
"rewards/rejected": -0.5278164744377136, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3905907440629752e-07, |
|
"logits/chosen": -1.679107666015625, |
|
"logits/rejected": -1.0372817516326904, |
|
"logps/chosen": -586.6466674804688, |
|
"logps/rejected": -1267.5791015625, |
|
"loss": 0.0689, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.18233391642570496, |
|
"rewards/margins": 0.279081255197525, |
|
"rewards/rejected": -0.4614151418209076, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.31508393714177e-07, |
|
"logits/chosen": -1.4867794513702393, |
|
"logits/rejected": -0.8477977514266968, |
|
"logps/chosen": -554.5364990234375, |
|
"logps/rejected": -1226.228271484375, |
|
"loss": 0.0688, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16267123818397522, |
|
"rewards/margins": 0.3219824433326721, |
|
"rewards/rejected": -0.4846537113189697, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.241629335994471e-07, |
|
"logits/chosen": -1.8708140850067139, |
|
"logits/rejected": -1.0227950811386108, |
|
"logps/chosen": -799.6752319335938, |
|
"logps/rejected": -1433.6239013671875, |
|
"loss": 0.0827, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2366192787885666, |
|
"rewards/margins": 0.31682151556015015, |
|
"rewards/rejected": -0.5534407496452332, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1702333051763271e-07, |
|
"logits/chosen": -1.6258529424667358, |
|
"logits/rejected": -1.1475646495819092, |
|
"logps/chosen": -691.2940063476562, |
|
"logps/rejected": -1324.8748779296875, |
|
"loss": 0.0872, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20850765705108643, |
|
"rewards/margins": 0.28260093927383423, |
|
"rewards/rejected": -0.49110865592956543, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1009020308754587e-07, |
|
"logits/chosen": -1.414668083190918, |
|
"logits/rejected": -1.0075281858444214, |
|
"logps/chosen": -676.2430419921875, |
|
"logps/rejected": -1345.2977294921875, |
|
"loss": 0.0937, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.22368064522743225, |
|
"rewards/margins": 0.27526089549064636, |
|
"rewards/rejected": -0.498941570520401, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0336415203768962e-07, |
|
"logits/chosen": -1.6306703090667725, |
|
"logits/rejected": -1.1734403371810913, |
|
"logps/chosen": -728.4344482421875, |
|
"logps/rejected": -1319.7030029296875, |
|
"loss": 0.068, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.21984486281871796, |
|
"rewards/margins": 0.26837268471717834, |
|
"rewards/rejected": -0.4882175326347351, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.684576015420277e-08, |
|
"logits/chosen": -1.685302734375, |
|
"logits/rejected": -1.3321112394332886, |
|
"logps/chosen": -745.2166748046875, |
|
"logps/rejected": -1243.215087890625, |
|
"loss": 0.0751, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1889590322971344, |
|
"rewards/margins": 0.25796008110046387, |
|
"rewards/rejected": -0.44691914319992065, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.053559223036746e-08, |
|
"logits/chosen": -1.6785064935684204, |
|
"logits/rejected": -1.05990731716156, |
|
"logps/chosen": -662.5062255859375, |
|
"logps/rejected": -1327.50390625, |
|
"loss": 0.0552, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.21094217896461487, |
|
"rewards/margins": 0.28824371099472046, |
|
"rewards/rejected": -0.49918586015701294, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.44341950176683e-08, |
|
"logits/chosen": -1.6093642711639404, |
|
"logits/rejected": -0.9099162817001343, |
|
"logps/chosen": -605.3573608398438, |
|
"logps/rejected": -1294.749755859375, |
|
"loss": 0.0693, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.16440364718437195, |
|
"rewards/margins": 0.3216980993747711, |
|
"rewards/rejected": -0.4861017167568207, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.854209717842231e-08, |
|
"logits/chosen": -1.6602537631988525, |
|
"logits/rejected": -1.277896761894226, |
|
"logps/chosen": -698.0342407226562, |
|
"logps/rejected": -1383.7066650390625, |
|
"loss": 0.0846, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2171173393726349, |
|
"rewards/margins": 0.3016865849494934, |
|
"rewards/rejected": -0.5188038945198059, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.285980923996989e-08, |
|
"logits/chosen": -1.7102686166763306, |
|
"logits/rejected": -1.154813528060913, |
|
"logps/chosen": -705.587158203125, |
|
"logps/rejected": -1409.553955078125, |
|
"loss": 0.088, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.23871013522148132, |
|
"rewards/margins": 0.29305535554885864, |
|
"rewards/rejected": -0.5317655205726624, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.738782355044048e-08, |
|
"logits/chosen": -1.7222769260406494, |
|
"logits/rejected": -1.0485639572143555, |
|
"logps/chosen": -677.2478637695312, |
|
"logps/rejected": -1264.8682861328125, |
|
"loss": 0.0694, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.21438893675804138, |
|
"rewards/margins": 0.27774578332901, |
|
"rewards/rejected": -0.4921347498893738, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.212661423609184e-08, |
|
"logits/chosen": -1.6767486333847046, |
|
"logits/rejected": -0.995303750038147, |
|
"logps/chosen": -749.8323974609375, |
|
"logps/rejected": -1487.445068359375, |
|
"loss": 0.0432, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.21066558361053467, |
|
"rewards/margins": 0.3550676703453064, |
|
"rewards/rejected": -0.5657332539558411, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.707663716023021e-08, |
|
"logits/chosen": -1.4784590005874634, |
|
"logits/rejected": -0.6975718140602112, |
|
"logps/chosen": -666.94189453125, |
|
"logps/rejected": -1402.3831787109375, |
|
"loss": 0.0401, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.20545247197151184, |
|
"rewards/margins": 0.34110763669013977, |
|
"rewards/rejected": -0.5465600490570068, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.22383298837098e-08, |
|
"logits/chosen": -1.527578353881836, |
|
"logits/rejected": -0.8852685689926147, |
|
"logps/chosen": -677.0675659179688, |
|
"logps/rejected": -1252.460205078125, |
|
"loss": 0.0794, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.20508520305156708, |
|
"rewards/margins": 0.2905604839324951, |
|
"rewards/rejected": -0.4956456124782562, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.761211162702117e-08, |
|
"logits/chosen": -1.4756338596343994, |
|
"logits/rejected": -0.9907282590866089, |
|
"logps/chosen": -628.2028198242188, |
|
"logps/rejected": -1525.0224609375, |
|
"loss": 0.0322, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.21087181568145752, |
|
"rewards/margins": 0.3604298532009125, |
|
"rewards/rejected": -0.5713016390800476, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.319838323396691e-08, |
|
"logits/chosen": -1.6325428485870361, |
|
"logits/rejected": -1.2529757022857666, |
|
"logps/chosen": -640.9552612304688, |
|
"logps/rejected": -1428.263916015625, |
|
"loss": 0.0422, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1934538334608078, |
|
"rewards/margins": 0.32024726271629333, |
|
"rewards/rejected": -0.5137011408805847, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.8997527136930004e-08, |
|
"logits/chosen": -1.505150556564331, |
|
"logits/rejected": -0.8225961923599243, |
|
"logps/chosen": -707.0218505859375, |
|
"logps/rejected": -1327.9573974609375, |
|
"loss": 0.0923, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2203945368528366, |
|
"rewards/margins": 0.2966047525405884, |
|
"rewards/rejected": -0.5169993042945862, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.5009907323737826e-08, |
|
"logits/chosen": -1.5440739393234253, |
|
"logits/rejected": -0.8097125887870789, |
|
"logps/chosen": -710.9814453125, |
|
"logps/rejected": -1306.008056640625, |
|
"loss": 0.0667, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.21110442280769348, |
|
"rewards/margins": 0.3028753995895386, |
|
"rewards/rejected": -0.5139797925949097, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.1235869306123766e-08, |
|
"logits/chosen": -1.7210094928741455, |
|
"logits/rejected": -1.13853120803833, |
|
"logps/chosen": -723.6201782226562, |
|
"logps/rejected": -1426.6231689453125, |
|
"loss": 0.062, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.2129667103290558, |
|
"rewards/margins": 0.31347301602363586, |
|
"rewards/rejected": -0.5264397263526917, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.767574008979007e-08, |
|
"logits/chosen": -1.492082953453064, |
|
"logits/rejected": -0.8750694990158081, |
|
"logps/chosen": -754.5972290039062, |
|
"logps/rejected": -1386.701904296875, |
|
"loss": 0.0783, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.23436252772808075, |
|
"rewards/margins": 0.2830619215965271, |
|
"rewards/rejected": -0.517424464225769, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4329828146074096e-08, |
|
"logits/chosen": -1.768261194229126, |
|
"logits/rejected": -1.1734797954559326, |
|
"logps/chosen": -634.6853637695312, |
|
"logps/rejected": -1281.8953857421875, |
|
"loss": 0.0754, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.19491219520568848, |
|
"rewards/margins": 0.29776549339294434, |
|
"rewards/rejected": -0.4926777482032776, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1198423385220822e-08, |
|
"logits/chosen": -1.518048882484436, |
|
"logits/rejected": -1.1481419801712036, |
|
"logps/chosen": -578.7421875, |
|
"logps/rejected": -1115.6094970703125, |
|
"loss": 0.1095, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.18905951082706451, |
|
"rewards/margins": 0.2177170068025589, |
|
"rewards/rejected": -0.4067765176296234, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.82817971312621e-08, |
|
"logits/chosen": -1.632956862449646, |
|
"logits/rejected": -1.2727057933807373, |
|
"logps/chosen": -638.421875, |
|
"logps/rejected": -1247.95751953125, |
|
"loss": 0.0849, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.20175893604755402, |
|
"rewards/margins": 0.25679153203964233, |
|
"rewards/rejected": -0.45855045318603516, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5580202098509078e-08, |
|
"logits/chosen": -1.6741609573364258, |
|
"logits/rejected": -1.1588261127471924, |
|
"logps/chosen": -625.3444213867188, |
|
"logps/rejected": -1219.2210693359375, |
|
"loss": 0.0667, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15933595597743988, |
|
"rewards/margins": 0.29343315958976746, |
|
"rewards/rejected": -0.4527691900730133, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3093872369654148e-08, |
|
"logits/chosen": -1.5549055337905884, |
|
"logits/rejected": -1.1253769397735596, |
|
"logps/chosen": -622.0947265625, |
|
"logps/rejected": -1193.876220703125, |
|
"loss": 0.1058, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.21242782473564148, |
|
"rewards/margins": 0.2279575765132904, |
|
"rewards/rejected": -0.4403854012489319, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0823023375489128e-08, |
|
"logits/chosen": -1.463275671005249, |
|
"logits/rejected": -0.7773112058639526, |
|
"logps/chosen": -759.4178466796875, |
|
"logps/rejected": -1502.75732421875, |
|
"loss": 0.0479, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.24037644267082214, |
|
"rewards/margins": 0.34176188707351685, |
|
"rewards/rejected": -0.5821383595466614, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.767851876239075e-09, |
|
"logits/chosen": -1.8317701816558838, |
|
"logits/rejected": -0.975128173828125, |
|
"logps/chosen": -716.7714233398438, |
|
"logps/rejected": -1325.99072265625, |
|
"loss": 0.0745, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19285240769386292, |
|
"rewards/margins": 0.2963181734085083, |
|
"rewards/rejected": -0.48917055130004883, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.9285359445145366e-09, |
|
"logits/chosen": -1.6910631656646729, |
|
"logits/rejected": -1.0203049182891846, |
|
"logps/chosen": -723.9457397460938, |
|
"logps/rejected": -1435.2896728515625, |
|
"loss": 0.0543, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.20491650700569153, |
|
"rewards/margins": 0.3260103464126587, |
|
"rewards/rejected": -0.5309268832206726, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.305234949880001e-09, |
|
"logits/chosen": -1.5100289583206177, |
|
"logits/rejected": -0.9951759576797485, |
|
"logps/chosen": -733.7612915039062, |
|
"logps/rejected": -1351.5802001953125, |
|
"loss": 0.0599, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2286415845155716, |
|
"rewards/margins": 0.2934814393520355, |
|
"rewards/rejected": -0.5221229791641235, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.8980895450474455e-09, |
|
"logits/chosen": -1.6468843221664429, |
|
"logits/rejected": -0.8552689552307129, |
|
"logps/chosen": -657.6549072265625, |
|
"logps/rejected": -1334.9576416015625, |
|
"loss": 0.0688, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1897544115781784, |
|
"rewards/margins": 0.32274651527404785, |
|
"rewards/rejected": -0.5125009417533875, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7072216536885855e-09, |
|
"logits/chosen": -1.424248218536377, |
|
"logits/rejected": -0.8001385927200317, |
|
"logps/chosen": -590.4542846679688, |
|
"logps/rejected": -1241.249267578125, |
|
"loss": 0.0646, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16355466842651367, |
|
"rewards/margins": 0.31166428327560425, |
|
"rewards/rejected": -0.47521892189979553, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.7327344598702667e-09, |
|
"logits/chosen": -1.6445577144622803, |
|
"logits/rejected": -0.9098326563835144, |
|
"logps/chosen": -725.5911865234375, |
|
"logps/rejected": -1269.808349609375, |
|
"loss": 0.0797, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.19787754118442535, |
|
"rewards/margins": 0.27285638451576233, |
|
"rewards/rejected": -0.47073397040367126, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.747123991141193e-10, |
|
"logits/chosen": -1.4280940294265747, |
|
"logits/rejected": -0.8096100687980652, |
|
"logps/chosen": -702.1490478515625, |
|
"logps/rejected": -1242.6480712890625, |
|
"loss": 0.0495, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1968768984079361, |
|
"rewards/margins": 0.2870542109012604, |
|
"rewards/rejected": -0.48393112421035767, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.332211510807427e-10, |
|
"logits/chosen": -1.5423029661178589, |
|
"logits/rejected": -1.002629041671753, |
|
"logps/chosen": -668.7158813476562, |
|
"logps/rejected": -1202.1529541015625, |
|
"loss": 0.0746, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20696516335010529, |
|
"rewards/margins": 0.26598578691482544, |
|
"rewards/rejected": -0.47295087575912476, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0830763387897902e-10, |
|
"logits/chosen": -1.3063008785247803, |
|
"logits/rejected": -1.1998459100723267, |
|
"logps/chosen": -585.5635375976562, |
|
"logps/rejected": -1489.9176025390625, |
|
"loss": 0.0559, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.20169933140277863, |
|
"rewards/margins": 0.3584030270576477, |
|
"rewards/rejected": -0.5601023435592651, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.7910579442977905, |
|
"logits/rejected": -1.1715670824050903, |
|
"logps/chosen": -708.5948486328125, |
|
"logps/rejected": -1328.570556640625, |
|
"loss": 0.0773, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.21266476809978485, |
|
"rewards/margins": 0.2909637689590454, |
|
"rewards/rejected": -0.5036285519599915, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3750, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0801518168369929, |
|
"train_runtime": 15780.2418, |
|
"train_samples_per_second": 0.951, |
|
"train_steps_per_second": 0.238 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3750, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|