{ "epoch": 1.0, "eval_logits/chosen": -2.0918362140655518, "eval_logits/rejected": -2.03544020652771, "eval_logps/chosen": -271.2979431152344, "eval_logps/rejected": -267.2720642089844, "eval_loss": 1721.1201171875, "eval_rewards/accuracies": 0.773809552192688, "eval_rewards/chosen": -0.06273359060287476, "eval_rewards/margins": 0.16227789223194122, "eval_rewards/rejected": -0.22501146793365479, "eval_runtime": 548.8776, "eval_samples": 2000, "eval_samples_per_second": 3.644, "eval_steps_per_second": 0.115 }