{ "epoch": 0.9932523616734144, "eval_logits/chosen": -2.2535200119018555, "eval_logits/rejected": -2.1303248405456543, "eval_logps/chosen": -415.7643127441406, "eval_logps/rejected": -524.5093994140625, "eval_loss": 0.5265253782272339, "eval_rewards/accuracies": 0.7674897313117981, "eval_rewards/chosen": -1.6185845136642456, "eval_rewards/margins": 0.9692981839179993, "eval_rewards/rejected": -2.5878827571868896, "eval_runtime": 274.1244, "eval_samples": 1943, "eval_samples_per_second": 7.088, "eval_steps_per_second": 0.886, "total_flos": 0.0, "train_loss": 0.6000239335972329, "train_runtime": 5043.4745, "train_samples": 14820, "train_samples_per_second": 2.938, "train_steps_per_second": 0.023 }