{ "epoch": 2.9956659924877203, "eval_logits/chosen": -0.34079235792160034, "eval_logits/rejected": -0.31415677070617676, "eval_logps/chosen": -143.9447784423828, "eval_logps/rejected": -155.83648681640625, "eval_loss": 1.5984355211257935, "eval_nll_loss": 0.3937048017978668, "eval_rewards/accuracies": 0.6304348111152649, "eval_rewards/chosen": -14.394478797912598, "eval_rewards/margins": 1.1891697645187378, "eval_rewards/rejected": -15.583648681640625, "eval_runtime": 73.3871, "eval_samples": 1826, "eval_samples_per_second": 24.882, "eval_steps_per_second": 1.567 }