{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9951497860199715, "eval_steps": 100, "global_step": 109, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 2.7272727272727274e-07, "logits/chosen": -2.779836893081665, "logits/rejected": -2.772892951965332, "logps/chosen": -67.39044952392578, "logps/rejected": -65.7892074584961, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.09, "learning_rate": 2.7272727272727272e-06, "logits/chosen": -2.764448881149292, "logits/rejected": -2.7583670616149902, "logps/chosen": -63.044189453125, "logps/rejected": -65.00924682617188, "loss": 0.6834, "rewards/accuracies": 0.6076388955116272, "rewards/chosen": 0.011253755539655685, "rewards/margins": 0.020065149292349815, "rewards/rejected": -0.008811394684016705, "step": 10 }, { "epoch": 0.18, "learning_rate": 2.7244897959183674e-06, "logits/chosen": -2.7638134956359863, "logits/rejected": -2.7512309551239014, "logps/chosen": -60.88775634765625, "logps/rejected": -64.6989517211914, "loss": 0.5807, "rewards/accuracies": 0.8656250238418579, "rewards/chosen": 0.074214868247509, "rewards/margins": 0.26364782452583313, "rewards/rejected": -0.18943293392658234, "step": 20 }, { "epoch": 0.27, "learning_rate": 2.4183673469387754e-06, "logits/chosen": -2.7466235160827637, "logits/rejected": -2.7364418506622314, "logps/chosen": -62.1016845703125, "logps/rejected": -73.9842529296875, "loss": 0.4425, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": 0.031027918681502342, "rewards/margins": 0.7247552275657654, "rewards/rejected": -0.6937273740768433, "step": 30 }, { "epoch": 0.37, "learning_rate": 2.112244897959184e-06, "logits/chosen": -2.720366954803467, "logits/rejected": -2.707810401916504, "logps/chosen": -64.14788818359375, "logps/rejected": -80.15257263183594, "loss": 0.3545, "rewards/accuracies": 0.909375011920929, "rewards/chosen": -0.217222660779953, "rewards/margins": 1.13102388381958, "rewards/rejected": -1.348246693611145, "step": 40 }, { "epoch": 0.46, "learning_rate": 1.806122448979592e-06, "logits/chosen": -2.703882932662964, "logits/rejected": -2.688373327255249, "logps/chosen": -68.16423034667969, "logps/rejected": -81.43087005615234, "loss": 0.3228, "rewards/accuracies": 0.903124988079071, "rewards/chosen": -0.3540056347846985, "rewards/margins": 1.3236706256866455, "rewards/rejected": -1.6776764392852783, "step": 50 }, { "epoch": 0.55, "learning_rate": 1.5e-06, "logits/chosen": -2.6891207695007324, "logits/rejected": -2.6766979694366455, "logps/chosen": -65.70265197753906, "logps/rejected": -84.64549255371094, "loss": 0.3174, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.44440922141075134, "rewards/margins": 1.486973762512207, "rewards/rejected": -1.9313831329345703, "step": 60 }, { "epoch": 0.64, "learning_rate": 1.193877551020408e-06, "logits/chosen": -2.696516275405884, "logits/rejected": -2.6814627647399902, "logps/chosen": -68.18341827392578, "logps/rejected": -88.01350402832031, "loss": 0.2817, "rewards/accuracies": 0.903124988079071, "rewards/chosen": -0.6013726592063904, "rewards/margins": 1.7021477222442627, "rewards/rejected": -2.303520441055298, "step": 70 }, { "epoch": 0.73, "learning_rate": 8.877551020408164e-07, "logits/chosen": -2.680356025695801, "logits/rejected": -2.662485122680664, "logps/chosen": -69.7511978149414, "logps/rejected": -89.8312759399414, "loss": 0.2427, "rewards/accuracies": 0.921875, "rewards/chosen": -0.5794776678085327, "rewards/margins": 1.9294792413711548, "rewards/rejected": -2.5089569091796875, "step": 80 }, { "epoch": 0.82, "learning_rate": 5.816326530612245e-07, "logits/chosen": -2.6861064434051514, "logits/rejected": -2.6741652488708496, "logps/chosen": -71.44612884521484, "logps/rejected": -92.25032806396484, "loss": 0.257, "rewards/accuracies": 0.8968750238418579, "rewards/chosen": -0.6901184320449829, "rewards/margins": 1.9418220520019531, "rewards/rejected": -2.6319406032562256, "step": 90 }, { "epoch": 0.91, "learning_rate": 2.7551020408163265e-07, "logits/chosen": -2.6638271808624268, "logits/rejected": -2.6519863605499268, "logps/chosen": -72.42386627197266, "logps/rejected": -90.99058532714844, "loss": 0.2434, "rewards/accuracies": 0.9281250238418579, "rewards/chosen": -0.7177737355232239, "rewards/margins": 2.0420150756835938, "rewards/rejected": -2.759788751602173, "step": 100 }, { "epoch": 1.0, "eval_logits/chosen": -2.6727941036224365, "eval_logits/rejected": -2.6590042114257812, "eval_logps/chosen": -70.43208312988281, "eval_logps/rejected": -92.74579620361328, "eval_loss": 0.24523001909255981, "eval_rewards/accuracies": 0.913241982460022, "eval_rewards/chosen": -0.7311916947364807, "eval_rewards/margins": 2.0473380088806152, "eval_rewards/rejected": -2.778529405593872, "eval_runtime": 288.5183, "eval_samples_per_second": 3.036, "eval_steps_per_second": 3.036, "step": 109 }, { "epoch": 1.0, "step": 109, "total_flos": 0.0, "train_loss": 0.3616804119643815, "train_runtime": 2568.179, "train_samples_per_second": 1.365, "train_steps_per_second": 0.042 } ], "logging_steps": 10, "max_steps": 109, "num_train_epochs": 1, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }