{ "epoch": 0.996510067114094, "eval_logits/chosen": -3.1117970943450928, "eval_logits/rejected": -3.3963212966918945, "eval_logps/chosen": -2103.136962890625, "eval_logps/rejected": -10148.9853515625, "eval_loss": 0.17164821922779083, "eval_rewards/accuracies": 0.9180327653884888, "eval_rewards/chosen": -2.3896186351776123, "eval_rewards/margins": 8.561161994934082, "eval_rewards/rejected": -10.950779914855957, "eval_runtime": 428.9453, "eval_samples": 1951, "eval_samples_per_second": 4.548, "eval_steps_per_second": 0.569, "total_flos": 0.0, "train_loss": 0.2600768296497649, "train_runtime": 7822.1359, "train_samples": 14897, "train_samples_per_second": 1.904, "train_steps_per_second": 0.015 }