{ "epoch": 2.999214865218529, "eval_kl": 0.0, "eval_logits/chosen": -1429059328.0, "eval_logits/rejected": -1154032768.0, "eval_logps/chosen": -548.4809500247402, "eval_logps/rejected": -840.9109895574342, "eval_loss": 0.4489765763282776, "eval_rewards/chosen": -2.1658832261256804, "eval_rewards/margins": 3.285384799732102, "eval_rewards/rejected": -5.451268025857782, "eval_runtime": 93.4316, "eval_samples": 4000, "eval_samples_per_second": 42.812, "eval_steps_per_second": 0.674, "total_flos": 0.0, "train_loss": 0.0, "train_runtime": 0.0431, "train_samples": 122269, "train_samples_per_second": 8512981.493, "train_steps_per_second": 132983.787 }