SmolLM-360M-Instruct / all_results.json
loubnabnl's picture
loubnabnl HF staff
End of training
eaf6d58 verified
raw
history blame
772 Bytes
{
"epoch": 0.985781990521327,
"eval_logits/chosen": -0.5211005210876465,
"eval_logits/rejected": -0.7105662226676941,
"eval_logps/chosen": -442.4708557128906,
"eval_logps/rejected": -418.5675354003906,
"eval_loss": 0.6833909749984741,
"eval_rewards/accuracies": 0.59375,
"eval_rewards/chosen": -0.008601006120443344,
"eval_rewards/margins": 0.021825773641467094,
"eval_rewards/rejected": -0.03042677976191044,
"eval_runtime": 8.5409,
"eval_samples": 750,
"eval_samples_per_second": 87.812,
"eval_steps_per_second": 2.81,
"total_flos": 0.0,
"train_loss": 0.6860739244864538,
"train_runtime": 316.322,
"train_samples": 6750,
"train_samples_per_second": 21.339,
"train_steps_per_second": 0.164
}