{ "epoch": 1.9936102236421727, "eval_log_odds_chosen": 1.0634132623672485, "eval_log_odds_ratio": -0.421150267124176, "eval_logits/chosen": 35.52544021606445, "eval_logits/rejected": 34.42332077026367, "eval_logps/chosen": -0.3376733958721161, "eval_logps/rejected": -0.8398498296737671, "eval_loss": 0.6817505359649658, "eval_nll_loss": 0.6413611173629761, "eval_rewards/accuracies": 0.8088235259056091, "eval_rewards/chosen": -0.033767346292734146, "eval_rewards/margins": 0.0502176471054554, "eval_rewards/rejected": -0.08398497849702835, "eval_runtime": 252.7054, "eval_samples": 5398, "eval_samples_per_second": 21.361, "eval_steps_per_second": 0.336, "total_flos": 0.0, "train_loss": 0.7846963420892373, "train_runtime": 5577.3844, "train_samples": 20000, "train_samples_per_second": 7.172, "train_steps_per_second": 0.056 }