{ "epoch": 2.9930715935334873, "eval_logits/chosen": -0.35042497515678406, "eval_logits/rejected": -0.3381310701370239, "eval_logps/chosen": -150.01190185546875, "eval_logps/rejected": -158.89979553222656, "eval_loss": 1.6688746213912964, "eval_nll_loss": 0.416054904460907, "eval_rewards/accuracies": 0.6336206793785095, "eval_rewards/chosen": -15.001189231872559, "eval_rewards/margins": 0.8887884616851807, "eval_rewards/rejected": -15.88998031616211, "eval_runtime": 41.8137, "eval_samples": 1826, "eval_samples_per_second": 43.67, "eval_steps_per_second": 0.694, "total_flos": 0.0, "train_loss": 1.6959601876176433, "train_runtime": 15481.5304, "train_samples": 55376, "train_samples_per_second": 10.731, "train_steps_per_second": 0.021 }