{ "epoch": 3.0, "eval_logits/chosen": -2.715555429458618, "eval_logits/rejected": -2.6699323654174805, "eval_logps/chosen": -282.458740234375, "eval_logps/rejected": -204.27066040039062, "eval_loss": 0.3553008437156677, "eval_rewards/accuracies": 0.828125, "eval_rewards/chosen": -0.8621728420257568, "eval_rewards/margins": 2.261284112930298, "eval_rewards/rejected": -3.123457193374634, "eval_runtime": 259.9977, "eval_samples": 2000, "eval_samples_per_second": 7.692, "eval_steps_per_second": 0.062, "train_loss": 0.21351368668972423, "train_runtime": 46913.4477, "train_samples": 62064, "train_samples_per_second": 3.969, "train_steps_per_second": 0.031 }