{ "epoch": 0.9985553308292401, "eval_logits/chosen": -0.4112316370010376, "eval_logits/rejected": -0.38529691100120544, "eval_logps/chosen": -154.36468505859375, "eval_logps/rejected": -161.3667755126953, "eval_loss": 1.598533034324646, "eval_nll_loss": 0.42096075415611267, "eval_rewards/accuracies": 0.623913049697876, "eval_rewards/chosen": -15.436468124389648, "eval_rewards/margins": 0.7002089619636536, "eval_rewards/rejected": -16.13667869567871, "eval_runtime": 73.3622, "eval_samples": 1826, "eval_samples_per_second": 24.89, "eval_steps_per_second": 1.568 }