llama3.1-cpo-full-0911 / eval_results.json
jbjeong91's picture
End of training
6c56647 verified
{
"epoch": 2.9956659924877203,
"eval_logits/chosen": -0.34079235792160034,
"eval_logits/rejected": -0.31415677070617676,
"eval_logps/chosen": -143.9447784423828,
"eval_logps/rejected": -155.83648681640625,
"eval_loss": 1.5984355211257935,
"eval_nll_loss": 0.3937048017978668,
"eval_rewards/accuracies": 0.6304348111152649,
"eval_rewards/chosen": -14.394478797912598,
"eval_rewards/margins": 1.1891697645187378,
"eval_rewards/rejected": -15.583648681640625,
"eval_runtime": 73.3871,
"eval_samples": 1826,
"eval_samples_per_second": 24.882,
"eval_steps_per_second": 1.567
}