|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9951497860199715, |
|
"eval_steps": 100, |
|
"global_step": 109, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.7272727272727274e-07, |
|
"logits/chosen": -2.779836893081665, |
|
"logits/rejected": -2.772892951965332, |
|
"logps/chosen": -67.39044952392578, |
|
"logps/rejected": -65.7892074584961, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.7272727272727272e-06, |
|
"logits/chosen": -2.764448881149292, |
|
"logits/rejected": -2.7583670616149902, |
|
"logps/chosen": -63.044189453125, |
|
"logps/rejected": -65.00924682617188, |
|
"loss": 0.6834, |
|
"rewards/accuracies": 0.6076388955116272, |
|
"rewards/chosen": 0.011253755539655685, |
|
"rewards/margins": 0.020065149292349815, |
|
"rewards/rejected": -0.008811394684016705, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.7244897959183674e-06, |
|
"logits/chosen": -2.7638134956359863, |
|
"logits/rejected": -2.7512309551239014, |
|
"logps/chosen": -60.88775634765625, |
|
"logps/rejected": -64.6989517211914, |
|
"loss": 0.5807, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": 0.074214868247509, |
|
"rewards/margins": 0.26364782452583313, |
|
"rewards/rejected": -0.18943293392658234, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.4183673469387754e-06, |
|
"logits/chosen": -2.7466235160827637, |
|
"logits/rejected": -2.7364418506622314, |
|
"logps/chosen": -62.1016845703125, |
|
"logps/rejected": -73.9842529296875, |
|
"loss": 0.4425, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 0.031027918681502342, |
|
"rewards/margins": 0.7247552275657654, |
|
"rewards/rejected": -0.6937273740768433, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.112244897959184e-06, |
|
"logits/chosen": -2.720366954803467, |
|
"logits/rejected": -2.707810401916504, |
|
"logps/chosen": -64.14788818359375, |
|
"logps/rejected": -80.15257263183594, |
|
"loss": 0.3545, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/chosen": -0.217222660779953, |
|
"rewards/margins": 1.13102388381958, |
|
"rewards/rejected": -1.348246693611145, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.806122448979592e-06, |
|
"logits/chosen": -2.703882932662964, |
|
"logits/rejected": -2.688373327255249, |
|
"logps/chosen": -68.16423034667969, |
|
"logps/rejected": -81.43087005615234, |
|
"loss": 0.3228, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/chosen": -0.3540056347846985, |
|
"rewards/margins": 1.3236706256866455, |
|
"rewards/rejected": -1.6776764392852783, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.5e-06, |
|
"logits/chosen": -2.6891207695007324, |
|
"logits/rejected": -2.6766979694366455, |
|
"logps/chosen": -65.70265197753906, |
|
"logps/rejected": -84.64549255371094, |
|
"loss": 0.3174, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.44440922141075134, |
|
"rewards/margins": 1.486973762512207, |
|
"rewards/rejected": -1.9313831329345703, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.193877551020408e-06, |
|
"logits/chosen": -2.696516275405884, |
|
"logits/rejected": -2.6814627647399902, |
|
"logps/chosen": -68.18341827392578, |
|
"logps/rejected": -88.01350402832031, |
|
"loss": 0.2817, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/chosen": -0.6013726592063904, |
|
"rewards/margins": 1.7021477222442627, |
|
"rewards/rejected": -2.303520441055298, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.877551020408164e-07, |
|
"logits/chosen": -2.680356025695801, |
|
"logits/rejected": -2.662485122680664, |
|
"logps/chosen": -69.7511978149414, |
|
"logps/rejected": -89.8312759399414, |
|
"loss": 0.2427, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -0.5794776678085327, |
|
"rewards/margins": 1.9294792413711548, |
|
"rewards/rejected": -2.5089569091796875, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.816326530612245e-07, |
|
"logits/chosen": -2.6861064434051514, |
|
"logits/rejected": -2.6741652488708496, |
|
"logps/chosen": -71.44612884521484, |
|
"logps/rejected": -92.25032806396484, |
|
"loss": 0.257, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/chosen": -0.6901184320449829, |
|
"rewards/margins": 1.9418220520019531, |
|
"rewards/rejected": -2.6319406032562256, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.7551020408163265e-07, |
|
"logits/chosen": -2.6638271808624268, |
|
"logits/rejected": -2.6519863605499268, |
|
"logps/chosen": -72.42386627197266, |
|
"logps/rejected": -90.99058532714844, |
|
"loss": 0.2434, |
|
"rewards/accuracies": 0.9281250238418579, |
|
"rewards/chosen": -0.7177737355232239, |
|
"rewards/margins": 2.0420150756835938, |
|
"rewards/rejected": -2.759788751602173, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.6727941036224365, |
|
"eval_logits/rejected": -2.6590042114257812, |
|
"eval_logps/chosen": -70.43208312988281, |
|
"eval_logps/rejected": -92.74579620361328, |
|
"eval_loss": 0.24523001909255981, |
|
"eval_rewards/accuracies": 0.913241982460022, |
|
"eval_rewards/chosen": -0.7311916947364807, |
|
"eval_rewards/margins": 2.0473380088806152, |
|
"eval_rewards/rejected": -2.778529405593872, |
|
"eval_runtime": 288.5183, |
|
"eval_samples_per_second": 3.036, |
|
"eval_steps_per_second": 3.036, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 109, |
|
"total_flos": 0.0, |
|
"train_loss": 0.3616804119643815, |
|
"train_runtime": 2568.179, |
|
"train_samples_per_second": 1.365, |
|
"train_steps_per_second": 0.042 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 109, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|