{ "epoch": 1.0, "eval_logits/chosen": -3.034407377243042, "eval_logits/rejected": -3.069913864135742, "eval_logps/chosen": -271.40020751953125, "eval_logps/rejected": -175.5244140625, "eval_loss": 0.5650191903114319, "eval_rewards/accuracies": 0.76953125, "eval_rewards/chosen": 0.08157022297382355, "eval_rewards/margins": 0.33799096941947937, "eval_rewards/rejected": -0.25642073154449463, "eval_runtime": 254.1285, "eval_samples": 2000, "eval_samples_per_second": 7.87, "eval_steps_per_second": 0.063, "train_loss": 0.5539181610972611, "train_runtime": 15602.6148, "train_samples": 62064, "train_samples_per_second": 3.978, "train_steps_per_second": 0.031 }