|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 32, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"debug/policy_chosen_logits": 26.208711624145508, |
|
"debug/policy_chosen_logps": -419.32049560546875, |
|
"debug/policy_rejected_logits": 27.114166259765625, |
|
"debug/policy_rejected_logps": -409.2409362792969, |
|
"debug/reference_chosen_logps": -419.32049560546875, |
|
"debug/reference_rejected_logps": -409.2409362792969, |
|
"epoch": 0.03125, |
|
"grad_norm": 5.94402702532091, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 26.208711624145508, |
|
"logits/rejected": 27.114166259765625, |
|
"logps/chosen": -419.32049560546875, |
|
"logps/rejected": -409.2409362792969, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 27.364856719970703, |
|
"debug/policy_chosen_logps": -384.8145446777344, |
|
"debug/policy_rejected_logits": 30.614818572998047, |
|
"debug/policy_rejected_logps": -389.1242980957031, |
|
"debug/reference_chosen_logps": -385.05755615234375, |
|
"debug/reference_rejected_logps": -389.2853698730469, |
|
"epoch": 0.0625, |
|
"grad_norm": 5.403682848041972, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 27.364856719970703, |
|
"logits/rejected": 30.614818572998047, |
|
"logps/chosen": -384.8145446777344, |
|
"logps/rejected": -389.1242980957031, |
|
"loss": 0.5005, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.002430190797895193, |
|
"rewards/margins": 0.0008192063542082906, |
|
"rewards/rejected": 0.001610984792932868, |
|
"step": 2 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 27.064964294433594, |
|
"debug/policy_chosen_logps": -393.77032470703125, |
|
"debug/policy_rejected_logits": 24.79046058654785, |
|
"debug/policy_rejected_logps": -380.7741394042969, |
|
"debug/reference_chosen_logps": -393.5738525390625, |
|
"debug/reference_rejected_logps": -381.0361022949219, |
|
"epoch": 0.09375, |
|
"grad_norm": 5.494546355154901, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 27.064964294433594, |
|
"logits/rejected": 24.79046058654785, |
|
"logps/chosen": -393.77032470703125, |
|
"logps/rejected": -380.7741394042969, |
|
"loss": 0.4975, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0019646836444735527, |
|
"rewards/margins": -0.004584388807415962, |
|
"rewards/rejected": 0.0026197051629424095, |
|
"step": 3 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 28.537710189819336, |
|
"debug/policy_chosen_logps": -393.27496337890625, |
|
"debug/policy_rejected_logits": 26.111160278320312, |
|
"debug/policy_rejected_logps": -371.4323425292969, |
|
"debug/reference_chosen_logps": -394.78082275390625, |
|
"debug/reference_rejected_logps": -371.3717041015625, |
|
"epoch": 0.125, |
|
"grad_norm": 5.6289119594347605, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 28.537710189819336, |
|
"logits/rejected": 26.111160278320312, |
|
"logps/chosen": -393.27496337890625, |
|
"logps/rejected": -371.4323425292969, |
|
"loss": 0.496, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.015058821067214012, |
|
"rewards/margins": 0.015665167942643166, |
|
"rewards/rejected": -0.0006063459441065788, |
|
"step": 4 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 27.205202102661133, |
|
"debug/policy_chosen_logps": -372.9059753417969, |
|
"debug/policy_rejected_logits": 26.36362075805664, |
|
"debug/policy_rejected_logps": -375.8589172363281, |
|
"debug/reference_chosen_logps": -373.8547668457031, |
|
"debug/reference_rejected_logps": -376.77655029296875, |
|
"epoch": 0.15625, |
|
"grad_norm": 4.945940952646883, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 27.205202102661133, |
|
"logits/rejected": 26.36362075805664, |
|
"logps/chosen": -372.9059753417969, |
|
"logps/rejected": -375.8589172363281, |
|
"loss": 0.501, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.009487838484346867, |
|
"rewards/margins": 0.0003112029517069459, |
|
"rewards/rejected": 0.00917663611471653, |
|
"step": 5 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 26.221603393554688, |
|
"debug/policy_chosen_logps": -389.5379638671875, |
|
"debug/policy_rejected_logits": 28.28006362915039, |
|
"debug/policy_rejected_logps": -388.9604187011719, |
|
"debug/reference_chosen_logps": -390.29022216796875, |
|
"debug/reference_rejected_logps": -389.8282775878906, |
|
"epoch": 0.1875, |
|
"grad_norm": 5.296809968342226, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 26.221603393554688, |
|
"logits/rejected": 28.28006362915039, |
|
"logps/chosen": -389.5379638671875, |
|
"logps/rejected": -388.9604187011719, |
|
"loss": 0.5017, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.00752284936606884, |
|
"rewards/margins": -0.0011558537371456623, |
|
"rewards/rejected": 0.008678702637553215, |
|
"step": 6 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 26.66923713684082, |
|
"debug/policy_chosen_logps": -394.6316223144531, |
|
"debug/policy_rejected_logits": 25.254545211791992, |
|
"debug/policy_rejected_logps": -364.38299560546875, |
|
"debug/reference_chosen_logps": -395.24407958984375, |
|
"debug/reference_rejected_logps": -364.6360168457031, |
|
"epoch": 0.21875, |
|
"grad_norm": 5.136828721896042, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 26.66923713684082, |
|
"logits/rejected": 25.254545211791992, |
|
"logps/chosen": -394.6316223144531, |
|
"logps/rejected": -364.38299560546875, |
|
"loss": 0.4987, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.006124534644186497, |
|
"rewards/margins": 0.0035945128183811903, |
|
"rewards/rejected": 0.0025300215929746628, |
|
"step": 7 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 23.028860092163086, |
|
"debug/policy_chosen_logps": -385.67840576171875, |
|
"debug/policy_rejected_logits": 25.49799919128418, |
|
"debug/policy_rejected_logps": -404.09539794921875, |
|
"debug/reference_chosen_logps": -386.231201171875, |
|
"debug/reference_rejected_logps": -403.6939392089844, |
|
"epoch": 0.25, |
|
"grad_norm": 5.138563586224073, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 23.028860092163086, |
|
"logits/rejected": 25.49799919128418, |
|
"logps/chosen": -385.67840576171875, |
|
"logps/rejected": -404.09539794921875, |
|
"loss": 0.4964, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.005527915433049202, |
|
"rewards/margins": 0.009542694315314293, |
|
"rewards/rejected": -0.004014777950942516, |
|
"step": 8 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 27.734926223754883, |
|
"debug/policy_chosen_logps": -377.7676696777344, |
|
"debug/policy_rejected_logits": 26.249759674072266, |
|
"debug/policy_rejected_logps": -390.58367919921875, |
|
"debug/reference_chosen_logps": -377.9134216308594, |
|
"debug/reference_rejected_logps": -389.7505798339844, |
|
"epoch": 0.28125, |
|
"grad_norm": 5.126237673188204, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 27.734926223754883, |
|
"logits/rejected": 26.249759674072266, |
|
"logps/chosen": -377.7676696777344, |
|
"logps/rejected": -390.58367919921875, |
|
"loss": 0.4953, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.001457519712857902, |
|
"rewards/margins": 0.009788626804947853, |
|
"rewards/rejected": -0.008331108838319778, |
|
"step": 9 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 27.97374725341797, |
|
"debug/policy_chosen_logps": -372.5227966308594, |
|
"debug/policy_rejected_logits": 26.68909454345703, |
|
"debug/policy_rejected_logps": -371.6484375, |
|
"debug/reference_chosen_logps": -371.6475830078125, |
|
"debug/reference_rejected_logps": -371.02789306640625, |
|
"epoch": 0.3125, |
|
"grad_norm": 5.826967853852101, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 27.97374725341797, |
|
"logits/rejected": 26.68909454345703, |
|
"logps/chosen": -372.5227966308594, |
|
"logps/rejected": -371.6484375, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.008752173744142056, |
|
"rewards/margins": -0.002546653850004077, |
|
"rewards/rejected": -0.0062055205926299095, |
|
"step": 10 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 25.17486572265625, |
|
"debug/policy_chosen_logps": -387.5612487792969, |
|
"debug/policy_rejected_logits": 25.225900650024414, |
|
"debug/policy_rejected_logps": -394.29156494140625, |
|
"debug/reference_chosen_logps": -389.58349609375, |
|
"debug/reference_rejected_logps": -395.47021484375, |
|
"epoch": 0.34375, |
|
"grad_norm": 5.487120274982643, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 25.17486572265625, |
|
"logits/rejected": 25.225900650024414, |
|
"logps/chosen": -387.5612487792969, |
|
"logps/rejected": -394.29156494140625, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.020222166553139687, |
|
"rewards/margins": 0.008435897529125214, |
|
"rewards/rejected": 0.011786269955337048, |
|
"step": 11 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 25.62350082397461, |
|
"debug/policy_chosen_logps": -391.43536376953125, |
|
"debug/policy_rejected_logits": 26.19980239868164, |
|
"debug/policy_rejected_logps": -385.09454345703125, |
|
"debug/reference_chosen_logps": -391.71038818359375, |
|
"debug/reference_rejected_logps": -385.83221435546875, |
|
"epoch": 0.375, |
|
"grad_norm": 5.260475370442769, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 25.62350082397461, |
|
"logits/rejected": 26.19980239868164, |
|
"logps/chosen": -391.43536376953125, |
|
"logps/rejected": -385.09454345703125, |
|
"loss": 0.4959, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 0.0027502821758389473, |
|
"rewards/margins": -0.004626387730240822, |
|
"rewards/rejected": 0.007376670837402344, |
|
"step": 12 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 26.92571449279785, |
|
"debug/policy_chosen_logps": -368.7294006347656, |
|
"debug/policy_rejected_logits": 27.75008201599121, |
|
"debug/policy_rejected_logps": -406.4366149902344, |
|
"debug/reference_chosen_logps": -368.6622619628906, |
|
"debug/reference_rejected_logps": -406.2249450683594, |
|
"epoch": 0.40625, |
|
"grad_norm": 4.7848487788456415, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 26.92571449279785, |
|
"logits/rejected": 27.75008201599121, |
|
"logps/chosen": -368.7294006347656, |
|
"logps/rejected": -406.4366149902344, |
|
"loss": 0.4929, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0006715008057653904, |
|
"rewards/margins": 0.0014451986644417048, |
|
"rewards/rejected": -0.002116698771715164, |
|
"step": 13 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 30.04401969909668, |
|
"debug/policy_chosen_logps": -375.1585998535156, |
|
"debug/policy_rejected_logits": 26.233510971069336, |
|
"debug/policy_rejected_logps": -391.10772705078125, |
|
"debug/reference_chosen_logps": -374.92877197265625, |
|
"debug/reference_rejected_logps": -390.80914306640625, |
|
"epoch": 0.4375, |
|
"grad_norm": 4.836397952988449, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 30.04401969909668, |
|
"logits/rejected": 26.233510971069336, |
|
"logps/chosen": -375.1585998535156, |
|
"logps/rejected": -391.10772705078125, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0022980873472988605, |
|
"rewards/margins": 0.0006880564615130424, |
|
"rewards/rejected": -0.0029861442744731903, |
|
"step": 14 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 26.359272003173828, |
|
"debug/policy_chosen_logps": -380.72393798828125, |
|
"debug/policy_rejected_logits": 25.454103469848633, |
|
"debug/policy_rejected_logps": -372.8114013671875, |
|
"debug/reference_chosen_logps": -381.0364990234375, |
|
"debug/reference_rejected_logps": -374.25555419921875, |
|
"epoch": 0.46875, |
|
"grad_norm": 5.802453188137246, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 26.359272003173828, |
|
"logits/rejected": 25.454103469848633, |
|
"logps/chosen": -380.72393798828125, |
|
"logps/rejected": -372.8114013671875, |
|
"loss": 0.5021, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0031254198402166367, |
|
"rewards/margins": -0.011316032148897648, |
|
"rewards/rejected": 0.01444145105779171, |
|
"step": 15 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 27.719757080078125, |
|
"debug/policy_chosen_logps": -387.31719970703125, |
|
"debug/policy_rejected_logits": 28.422988891601562, |
|
"debug/policy_rejected_logps": -394.9139404296875, |
|
"debug/reference_chosen_logps": -387.28955078125, |
|
"debug/reference_rejected_logps": -392.607666015625, |
|
"epoch": 0.5, |
|
"grad_norm": 5.284125486448873, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 27.719757080078125, |
|
"logits/rejected": 28.422988891601562, |
|
"logps/chosen": -387.31719970703125, |
|
"logps/rejected": -394.9139404296875, |
|
"loss": 0.4931, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.00027671828866004944, |
|
"rewards/margins": 0.022786367684602737, |
|
"rewards/rejected": -0.023063087835907936, |
|
"step": 16 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 26.237287521362305, |
|
"debug/policy_chosen_logps": -396.80377197265625, |
|
"debug/policy_rejected_logits": 26.251012802124023, |
|
"debug/policy_rejected_logps": -415.9134521484375, |
|
"debug/reference_chosen_logps": -396.8568115234375, |
|
"debug/reference_rejected_logps": -414.50372314453125, |
|
"epoch": 0.53125, |
|
"grad_norm": 5.422306895885996, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 26.237287521362305, |
|
"logits/rejected": 26.251012802124023, |
|
"logps/chosen": -396.80377197265625, |
|
"logps/rejected": -415.9134521484375, |
|
"loss": 0.4936, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0005303573561832309, |
|
"rewards/margins": 0.014627190306782722, |
|
"rewards/rejected": -0.014096831902861595, |
|
"step": 17 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 27.24745750427246, |
|
"debug/policy_chosen_logps": -400.6668701171875, |
|
"debug/policy_rejected_logits": 28.665220260620117, |
|
"debug/policy_rejected_logps": -404.18115234375, |
|
"debug/reference_chosen_logps": -400.91192626953125, |
|
"debug/reference_rejected_logps": -404.140625, |
|
"epoch": 0.5625, |
|
"grad_norm": 6.154342037709508, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 27.24745750427246, |
|
"logits/rejected": 28.665220260620117, |
|
"logps/chosen": -400.6668701171875, |
|
"logps/rejected": -404.18115234375, |
|
"loss": 0.504, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0024504470638930798, |
|
"rewards/margins": 0.002855682745575905, |
|
"rewards/rejected": -0.0004052352160215378, |
|
"step": 18 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 26.074438095092773, |
|
"debug/policy_chosen_logps": -368.0001220703125, |
|
"debug/policy_rejected_logits": 28.75902557373047, |
|
"debug/policy_rejected_logps": -402.38555908203125, |
|
"debug/reference_chosen_logps": -369.5516357421875, |
|
"debug/reference_rejected_logps": -402.15142822265625, |
|
"epoch": 0.59375, |
|
"grad_norm": 4.942495765333941, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 26.074438095092773, |
|
"logits/rejected": 28.75902557373047, |
|
"logps/chosen": -368.0001220703125, |
|
"logps/rejected": -402.38555908203125, |
|
"loss": 0.4971, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.015514831990003586, |
|
"rewards/margins": 0.017856139689683914, |
|
"rewards/rejected": -0.002341308631002903, |
|
"step": 19 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 22.95450782775879, |
|
"debug/policy_chosen_logps": -387.22039794921875, |
|
"debug/policy_rejected_logits": 23.25604820251465, |
|
"debug/policy_rejected_logps": -407.40460205078125, |
|
"debug/reference_chosen_logps": -387.4991760253906, |
|
"debug/reference_rejected_logps": -406.79705810546875, |
|
"epoch": 0.625, |
|
"grad_norm": 5.841637379604238, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 22.95450782775879, |
|
"logits/rejected": 23.25604820251465, |
|
"logps/chosen": -387.22039794921875, |
|
"logps/rejected": -407.40460205078125, |
|
"loss": 0.4981, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0027874757070094347, |
|
"rewards/margins": 0.008862762711942196, |
|
"rewards/rejected": -0.00607528630644083, |
|
"step": 20 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 25.369287490844727, |
|
"debug/policy_chosen_logps": -384.18011474609375, |
|
"debug/policy_rejected_logits": 27.09587287902832, |
|
"debug/policy_rejected_logps": -374.6014709472656, |
|
"debug/reference_chosen_logps": -384.5386962890625, |
|
"debug/reference_rejected_logps": -374.75848388671875, |
|
"epoch": 0.65625, |
|
"grad_norm": 5.35593100802686, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 25.369287490844727, |
|
"logits/rejected": 27.09587287902832, |
|
"logps/chosen": -384.18011474609375, |
|
"logps/rejected": -374.6014709472656, |
|
"loss": 0.5014, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.003585891332477331, |
|
"rewards/margins": 0.0020158004481345415, |
|
"rewards/rejected": 0.0015700910007581115, |
|
"step": 21 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 29.95963478088379, |
|
"debug/policy_chosen_logps": -412.9902648925781, |
|
"debug/policy_rejected_logits": 29.491188049316406, |
|
"debug/policy_rejected_logps": -388.817138671875, |
|
"debug/reference_chosen_logps": -413.6742248535156, |
|
"debug/reference_rejected_logps": -388.7025146484375, |
|
"epoch": 0.6875, |
|
"grad_norm": 5.750706618647552, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 29.95963478088379, |
|
"logits/rejected": 29.491188049316406, |
|
"logps/chosen": -412.9902648925781, |
|
"logps/rejected": -388.817138671875, |
|
"loss": 0.5016, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0068396758288145065, |
|
"rewards/margins": 0.007985686883330345, |
|
"rewards/rejected": -0.0011460117530077696, |
|
"step": 22 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 27.35663414001465, |
|
"debug/policy_chosen_logps": -381.54901123046875, |
|
"debug/policy_rejected_logits": 27.309879302978516, |
|
"debug/policy_rejected_logps": -408.41070556640625, |
|
"debug/reference_chosen_logps": -381.82025146484375, |
|
"debug/reference_rejected_logps": -406.42999267578125, |
|
"epoch": 0.71875, |
|
"grad_norm": 5.238622917805314, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 27.35663414001465, |
|
"logits/rejected": 27.309879302978516, |
|
"logps/chosen": -381.54901123046875, |
|
"logps/rejected": -408.41070556640625, |
|
"loss": 0.4947, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.002712326357141137, |
|
"rewards/margins": 0.022518998011946678, |
|
"rewards/rejected": -0.019806671887636185, |
|
"step": 23 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 24.55692481994629, |
|
"debug/policy_chosen_logps": -373.54644775390625, |
|
"debug/policy_rejected_logits": 22.0833740234375, |
|
"debug/policy_rejected_logps": -383.18634033203125, |
|
"debug/reference_chosen_logps": -373.8763732910156, |
|
"debug/reference_rejected_logps": -384.98699951171875, |
|
"epoch": 0.75, |
|
"grad_norm": 5.752704029646181, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 24.55692481994629, |
|
"logits/rejected": 22.0833740234375, |
|
"logps/chosen": -373.54644775390625, |
|
"logps/rejected": -383.18634033203125, |
|
"loss": 0.5047, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.00329933175817132, |
|
"rewards/margins": -0.014707411639392376, |
|
"rewards/rejected": 0.018006745725870132, |
|
"step": 24 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 24.850391387939453, |
|
"debug/policy_chosen_logps": -386.2474365234375, |
|
"debug/policy_rejected_logits": 29.567493438720703, |
|
"debug/policy_rejected_logps": -422.58990478515625, |
|
"debug/reference_chosen_logps": -386.5673828125, |
|
"debug/reference_rejected_logps": -422.57989501953125, |
|
"epoch": 0.78125, |
|
"grad_norm": 5.457291481304808, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 24.850391387939453, |
|
"logits/rejected": 29.567493438720703, |
|
"logps/chosen": -386.2474365234375, |
|
"logps/rejected": -422.58990478515625, |
|
"loss": 0.4937, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.003199271857738495, |
|
"rewards/margins": 0.0032989501487463713, |
|
"rewards/rejected": -9.96782910078764e-05, |
|
"step": 25 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 24.353761672973633, |
|
"debug/policy_chosen_logps": -365.70977783203125, |
|
"debug/policy_rejected_logits": 27.883697509765625, |
|
"debug/policy_rejected_logps": -387.9311828613281, |
|
"debug/reference_chosen_logps": -365.4722900390625, |
|
"debug/reference_rejected_logps": -386.3861083984375, |
|
"epoch": 0.8125, |
|
"grad_norm": 6.268906937225549, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 24.353761672973633, |
|
"logits/rejected": 27.883697509765625, |
|
"logps/chosen": -365.70977783203125, |
|
"logps/rejected": -387.9311828613281, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0023751831613481045, |
|
"rewards/margins": 0.013075370341539383, |
|
"rewards/rejected": -0.0154505530372262, |
|
"step": 26 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 29.132570266723633, |
|
"debug/policy_chosen_logps": -383.54681396484375, |
|
"debug/policy_rejected_logits": 30.820039749145508, |
|
"debug/policy_rejected_logps": -391.74847412109375, |
|
"debug/reference_chosen_logps": -384.86737060546875, |
|
"debug/reference_rejected_logps": -390.72613525390625, |
|
"epoch": 0.84375, |
|
"grad_norm": 5.319109456599871, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 29.132570266723633, |
|
"logits/rejected": 30.820039749145508, |
|
"logps/chosen": -383.54681396484375, |
|
"logps/rejected": -391.74847412109375, |
|
"loss": 0.4953, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.013205718249082565, |
|
"rewards/margins": 0.023428915068507195, |
|
"rewards/rejected": -0.010223197750747204, |
|
"step": 27 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 21.42544174194336, |
|
"debug/policy_chosen_logps": -359.3302917480469, |
|
"debug/policy_rejected_logits": 23.681625366210938, |
|
"debug/policy_rejected_logps": -420.1634826660156, |
|
"debug/reference_chosen_logps": -360.5409240722656, |
|
"debug/reference_rejected_logps": -418.49676513671875, |
|
"epoch": 0.875, |
|
"grad_norm": 5.766637040961972, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 21.42544174194336, |
|
"logits/rejected": 23.681625366210938, |
|
"logps/chosen": -359.3302917480469, |
|
"logps/rejected": -420.1634826660156, |
|
"loss": 0.4952, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.012106247246265411, |
|
"rewards/margins": 0.028773421421647072, |
|
"rewards/rejected": -0.01666717603802681, |
|
"step": 28 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 23.576292037963867, |
|
"debug/policy_chosen_logps": -389.7811279296875, |
|
"debug/policy_rejected_logits": 25.178329467773438, |
|
"debug/policy_rejected_logps": -380.7835998535156, |
|
"debug/reference_chosen_logps": -388.56658935546875, |
|
"debug/reference_rejected_logps": -380.28936767578125, |
|
"epoch": 0.90625, |
|
"grad_norm": 5.149123243810572, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 23.576292037963867, |
|
"logits/rejected": 25.178329467773438, |
|
"logps/chosen": -389.7811279296875, |
|
"logps/rejected": -380.7835998535156, |
|
"loss": 0.5005, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.01214546151459217, |
|
"rewards/margins": -0.007203062996268272, |
|
"rewards/rejected": -0.004942398518323898, |
|
"step": 29 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 25.0103702545166, |
|
"debug/policy_chosen_logps": -374.01544189453125, |
|
"debug/policy_rejected_logits": 24.813074111938477, |
|
"debug/policy_rejected_logps": -402.4690246582031, |
|
"debug/reference_chosen_logps": -373.6084899902344, |
|
"debug/reference_rejected_logps": -400.3970947265625, |
|
"epoch": 0.9375, |
|
"grad_norm": 5.054574585698812, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 25.0103702545166, |
|
"logits/rejected": 24.813074111938477, |
|
"logps/chosen": -374.01544189453125, |
|
"logps/rejected": -402.4690246582031, |
|
"loss": 0.4868, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.00406951829791069, |
|
"rewards/margins": 0.016650084406137466, |
|
"rewards/rejected": -0.020719602704048157, |
|
"step": 30 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 24.653635025024414, |
|
"debug/policy_chosen_logps": -389.5201721191406, |
|
"debug/policy_rejected_logits": 26.097084045410156, |
|
"debug/policy_rejected_logps": -385.3985290527344, |
|
"debug/reference_chosen_logps": -388.95758056640625, |
|
"debug/reference_rejected_logps": -384.1873779296875, |
|
"epoch": 0.96875, |
|
"grad_norm": 5.42566873511884, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 24.653635025024414, |
|
"logits/rejected": 26.097084045410156, |
|
"logps/chosen": -389.5201721191406, |
|
"logps/rejected": -385.3985290527344, |
|
"loss": 0.4971, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.005625876598060131, |
|
"rewards/margins": 0.006485518999397755, |
|
"rewards/rejected": -0.012111397460103035, |
|
"step": 31 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": 26.803712844848633, |
|
"debug/policy_chosen_logps": -409.23480224609375, |
|
"debug/policy_rejected_logits": 27.91266632080078, |
|
"debug/policy_rejected_logps": -441.89007568359375, |
|
"debug/reference_chosen_logps": -409.35791015625, |
|
"debug/reference_rejected_logps": -439.5430908203125, |
|
"epoch": 1.0, |
|
"grad_norm": 5.122199540354154, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 26.803712844848633, |
|
"logits/rejected": 27.91266632080078, |
|
"logps/chosen": -409.23480224609375, |
|
"logps/rejected": -441.89007568359375, |
|
"loss": 0.4517, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0012308494187891483, |
|
"rewards/margins": 0.024701077491044998, |
|
"rewards/rejected": -0.023470228537917137, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 32, |
|
"total_flos": 0.0, |
|
"train_loss": 0.49607059359550476, |
|
"train_runtime": 386.5625, |
|
"train_samples_per_second": 5.161, |
|
"train_steps_per_second": 0.083 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 32, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|