{ "epoch": 0.9947089947089947, "eval_logits/chosen": -2.934617280960083, "eval_logits/rejected": -3.0282351970672607, "eval_logps/chosen": -524.4794921875, "eval_logps/rejected": -741.4998779296875, "eval_loss": 0.1684425175189972, "eval_rewards/accuracies": 0.9375, "eval_rewards/chosen": -0.7677375078201294, "eval_rewards/margins": 1.8244943618774414, "eval_rewards/rejected": -2.5922317504882812, "eval_runtime": 1.3947, "eval_samples": 100, "eval_samples_per_second": 71.701, "eval_steps_per_second": 0.717, "total_flos": 0.0, "train_loss": 0.5087684798747936, "train_runtime": 2521.2346, "train_samples": 96652, "train_samples_per_second": 38.335, "train_steps_per_second": 0.037 }