{ "epoch": 0.9982631930527722, "eval_logits/chosen": -1.3516209125518799, "eval_logits/rejected": -1.361035943031311, "eval_logps/chosen": -1.7249912023544312, "eval_logps/rejected": -2.307142972946167, "eval_loss": 1.355675220489502, "eval_rewards/accuracies": 0.8455284833908081, "eval_rewards/chosen": -17.24991226196289, "eval_rewards/margins": 5.821517467498779, "eval_rewards/rejected": -23.071430206298828, "eval_runtime": 87.7742, "eval_samples": 1961, "eval_samples_per_second": 22.341, "eval_steps_per_second": 1.401 }