NicholasCorrado's picture
End of training
87a6020 verified
raw
history blame contribute delete
770 Bytes
{
"epoch": 0.9922480620155039,
"eval_logits/chosen": -2.135279655456543,
"eval_logits/rejected": -2.0949225425720215,
"eval_logps/chosen": -164.1279296875,
"eval_logps/rejected": -183.2969970703125,
"eval_loss": 0.6877921223640442,
"eval_rewards/accuracies": 0.53125,
"eval_rewards/chosen": -0.0802411139011383,
"eval_rewards/margins": 0.007847035303711891,
"eval_rewards/rejected": -0.08808814734220505,
"eval_runtime": 2.3656,
"eval_samples": 100,
"eval_samples_per_second": 42.272,
"eval_steps_per_second": 0.845,
"total_flos": 0.0,
"train_loss": 0.6876659728586674,
"train_runtime": 1604.6168,
"train_samples": 33000,
"train_samples_per_second": 20.566,
"train_steps_per_second": 0.04
}