{ "epoch": 0.9985553308292401, "eval_logits/chosen": -0.4112316370010376, "eval_logits/rejected": -0.38529691100120544, "eval_logps/chosen": -154.36468505859375, "eval_logps/rejected": -161.3667755126953, "eval_loss": 1.598533034324646, "eval_nll_loss": 0.42096075415611267, "eval_rewards/accuracies": 0.623913049697876, "eval_rewards/chosen": -15.436468124389648, "eval_rewards/margins": 0.7002089619636536, "eval_rewards/rejected": -16.13667869567871, "eval_runtime": 73.3622, "eval_samples": 1826, "eval_samples_per_second": 24.89, "eval_steps_per_second": 1.568, "total_flos": 0.0, "train_loss": 1.77929983039697, "train_runtime": 9807.604, "train_samples": 55376, "train_samples_per_second": 5.646, "train_steps_per_second": 0.044 }