{ "epoch": 0.9982631930527722, "eval_logits/chosen": -1.402356505393982, "eval_logits/rejected": -1.3987274169921875, "eval_logps/chosen": -6.294243812561035, "eval_logps/rejected": -8.588370323181152, "eval_loss": 0.34357374906539917, "eval_rewards/accuracies": 0.8983739614486694, "eval_rewards/chosen": -12.58848762512207, "eval_rewards/margins": 4.588252067565918, "eval_rewards/rejected": -17.176740646362305, "eval_runtime": 87.9156, "eval_samples": 1961, "eval_samples_per_second": 22.305, "eval_steps_per_second": 1.399 }