{ "epoch": 0.9922480620155039, "eval_logits/chosen": -2.135279655456543, "eval_logits/rejected": -2.0949225425720215, "eval_logps/chosen": -164.1279296875, "eval_logps/rejected": -183.2969970703125, "eval_loss": 0.6877921223640442, "eval_rewards/accuracies": 0.53125, "eval_rewards/chosen": -0.0802411139011383, "eval_rewards/margins": 0.007847035303711891, "eval_rewards/rejected": -0.08808814734220505, "eval_runtime": 2.3656, "eval_samples": 100, "eval_samples_per_second": 42.272, "eval_steps_per_second": 0.845 }