llama3.1-cpo-full-0913 / eval_results.json
jbjeong91's picture
End of training
d9a3a92 verified
raw
history blame
625 Bytes
{
"epoch": 0.9985553308292401,
"eval_logits/chosen": -0.5159785151481628,
"eval_logits/rejected": -0.4926067888736725,
"eval_logps/chosen": -154.935546875,
"eval_logps/rejected": -162.19007873535156,
"eval_loss": 1.5933648347854614,
"eval_nll_loss": 0.42284756898880005,
"eval_rewards/accuracies": 0.626086950302124,
"eval_rewards/chosen": -15.493555068969727,
"eval_rewards/margins": 0.7254539132118225,
"eval_rewards/rejected": -16.219009399414062,
"eval_runtime": 73.3207,
"eval_samples": 1826,
"eval_samples_per_second": 24.904,
"eval_steps_per_second": 1.568
}