{ "epoch": 0.9932523616734144, "eval_logits/chosen": -2.2535200119018555, "eval_logits/rejected": -2.1303248405456543, "eval_logps/chosen": -415.7643127441406, "eval_logps/rejected": -524.5093994140625, "eval_loss": 0.5265253782272339, "eval_rewards/accuracies": 0.7674897313117981, "eval_rewards/chosen": -1.6185845136642456, "eval_rewards/margins": 0.9692981839179993, "eval_rewards/rejected": -2.5878827571868896, "eval_runtime": 274.1244, "eval_samples": 1943, "eval_samples_per_second": 7.088, "eval_steps_per_second": 0.886 }