SmolLM-360M-Instruct / eval_results.json
loubnabnl's picture
loubnabnl HF staff
End of training
eaf6d58 verified
raw
history blame
577 Bytes
{
"epoch": 0.985781990521327,
"eval_logits/chosen": -0.5211005210876465,
"eval_logits/rejected": -0.7105662226676941,
"eval_logps/chosen": -442.4708557128906,
"eval_logps/rejected": -418.5675354003906,
"eval_loss": 0.6833909749984741,
"eval_rewards/accuracies": 0.59375,
"eval_rewards/chosen": -0.008601006120443344,
"eval_rewards/margins": 0.021825773641467094,
"eval_rewards/rejected": -0.03042677976191044,
"eval_runtime": 8.5409,
"eval_samples": 750,
"eval_samples_per_second": 87.812,
"eval_steps_per_second": 2.81
}