|
{ |
|
"best_metric": 0.5, |
|
"best_model_checkpoint": "./zephyr/10-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.25-KTO_Experiment with a new tokenizer configuration for chat template of zephyr-2_max_steps-1470_batch_16_2024-04-10_ppid_9/checkpoint-300", |
|
"epoch": 7.260726072607261, |
|
"eval_steps": 50, |
|
"global_step": 1100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 57.293792724609375, |
|
"kl": 0.03853478282690048, |
|
"learning_rate": 6.222222222222222e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.7078, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 112.50944519042969, |
|
"kl": 3.2648494243621826, |
|
"learning_rate": 0.00014666666666666666, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6966, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -413.6161193847656, |
|
"eval_logps/rejected": -362.2559509277344, |
|
"eval_loss": 0.5063381791114807, |
|
"eval_rewards/chosen": -13.412939071655273, |
|
"eval_rewards/margins": -1.0048810243606567, |
|
"eval_rewards/rejected": -12.408059120178223, |
|
"eval_runtime": 170.1826, |
|
"eval_samples_per_second": 2.057, |
|
"eval_steps_per_second": 0.517, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 19.94582748413086, |
|
"kl": 0.45922356843948364, |
|
"learning_rate": 0.00019887719298245616, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.5743, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 79.92957305908203, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001960701754385965, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6108, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.06103940308094025, |
|
"kl": 0.0, |
|
"learning_rate": 0.00019326315789473686, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.754, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2027.0018310546875, |
|
"eval_logps/rejected": -1697.82177734375, |
|
"eval_loss": 0.5000000596046448, |
|
"eval_rewards/chosen": -174.75149536132812, |
|
"eval_rewards/margins": -28.786863327026367, |
|
"eval_rewards/rejected": -145.96463012695312, |
|
"eval_runtime": 170.0562, |
|
"eval_samples_per_second": 2.058, |
|
"eval_steps_per_second": 0.517, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001904561403508772, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.95, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00018764912280701756, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6274, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2237.81494140625, |
|
"eval_logps/rejected": -1889.774169921875, |
|
"eval_loss": 0.5, |
|
"eval_rewards/chosen": -195.83285522460938, |
|
"eval_rewards/margins": -30.672954559326172, |
|
"eval_rewards/rejected": -165.15989685058594, |
|
"eval_runtime": 169.8795, |
|
"eval_samples_per_second": 2.06, |
|
"eval_steps_per_second": 0.518, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001848421052631579, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6387, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00018203508771929826, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.8327, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00017922807017543862, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.642, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2230.916259765625, |
|
"eval_logps/rejected": -1884.9520263671875, |
|
"eval_loss": 0.5000000596046448, |
|
"eval_rewards/chosen": -195.14297485351562, |
|
"eval_rewards/margins": -30.465293884277344, |
|
"eval_rewards/rejected": -164.67767333984375, |
|
"eval_runtime": 170.1489, |
|
"eval_samples_per_second": 2.057, |
|
"eval_steps_per_second": 0.517, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00017642105263157896, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.7493, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001736140350877193, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6241, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2230.957275390625, |
|
"eval_logps/rejected": -1885.0225830078125, |
|
"eval_loss": 0.5000000596046448, |
|
"eval_rewards/chosen": -195.14706420898438, |
|
"eval_rewards/margins": -30.462318420410156, |
|
"eval_rewards/rejected": -164.68475341796875, |
|
"eval_runtime": 170.1092, |
|
"eval_samples_per_second": 2.058, |
|
"eval_steps_per_second": 0.517, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00017080701754385965, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.9621, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.000168, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.7279, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00016519298245614035, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.7477, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2238.164306640625, |
|
"eval_logps/rejected": -1890.7996826171875, |
|
"eval_loss": 0.5, |
|
"eval_rewards/chosen": -195.86773681640625, |
|
"eval_rewards/margins": -30.605329513549805, |
|
"eval_rewards/rejected": -165.26242065429688, |
|
"eval_runtime": 170.0647, |
|
"eval_samples_per_second": 2.058, |
|
"eval_steps_per_second": 0.517, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00016238596491228072, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.7111, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00015957894736842105, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.8685, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2238.054931640625, |
|
"eval_logps/rejected": -1890.694580078125, |
|
"eval_loss": 0.5, |
|
"eval_rewards/chosen": -195.85682678222656, |
|
"eval_rewards/margins": -30.604921340942383, |
|
"eval_rewards/rejected": -165.2519073486328, |
|
"eval_runtime": 170.0528, |
|
"eval_samples_per_second": 2.058, |
|
"eval_steps_per_second": 0.517, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00015677192982456142, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6905, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00015396491228070175, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.736, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00015115789473684211, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.693, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2237.827392578125, |
|
"eval_logps/rejected": -1890.5028076171875, |
|
"eval_loss": 0.5, |
|
"eval_rewards/chosen": -195.83407592773438, |
|
"eval_rewards/margins": -30.601318359375, |
|
"eval_rewards/rejected": -165.23275756835938, |
|
"eval_runtime": 170.2445, |
|
"eval_samples_per_second": 2.056, |
|
"eval_steps_per_second": 0.517, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"grad_norm": 8.788210266175156e-07, |
|
"kl": 0.0, |
|
"learning_rate": 0.00014835087719298245, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.8652, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001455438596491228, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.686, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2237.722412109375, |
|
"eval_logps/rejected": -1890.4027099609375, |
|
"eval_loss": 0.5, |
|
"eval_rewards/chosen": -195.82354736328125, |
|
"eval_rewards/margins": -30.600812911987305, |
|
"eval_rewards/rejected": -165.22274780273438, |
|
"eval_runtime": 170.3429, |
|
"eval_samples_per_second": 2.055, |
|
"eval_steps_per_second": 0.517, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00014273684210526318, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6858, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001399298245614035, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.8479, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00013712280701754388, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6119, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2237.6083984375, |
|
"eval_logps/rejected": -1890.3140869140625, |
|
"eval_loss": 0.5, |
|
"eval_rewards/chosen": -195.81216430664062, |
|
"eval_rewards/margins": -30.598268508911133, |
|
"eval_rewards/rejected": -165.21388244628906, |
|
"eval_runtime": 169.9488, |
|
"eval_samples_per_second": 2.059, |
|
"eval_steps_per_second": 0.518, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001343157894736842, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.7107, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00013150877192982455, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.5902, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2237.56494140625, |
|
"eval_logps/rejected": -1890.3043212890625, |
|
"eval_loss": 0.5, |
|
"eval_rewards/chosen": -195.80784606933594, |
|
"eval_rewards/margins": -30.59491539001465, |
|
"eval_rewards/rejected": -165.21290588378906, |
|
"eval_runtime": 169.9756, |
|
"eval_samples_per_second": 2.059, |
|
"eval_steps_per_second": 0.518, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001287017543859649, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.9042, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00012589473684210527, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.7268, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00012308771929824564, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.7106, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2241.97509765625, |
|
"eval_logps/rejected": -1893.87646484375, |
|
"eval_loss": 0.5, |
|
"eval_rewards/chosen": -196.24884033203125, |
|
"eval_rewards/margins": -30.67871856689453, |
|
"eval_rewards/rejected": -165.57012939453125, |
|
"eval_runtime": 169.9427, |
|
"eval_samples_per_second": 2.06, |
|
"eval_steps_per_second": 0.518, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00012028070175438597, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6829, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00011747368421052631, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.8232, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2241.91552734375, |
|
"eval_logps/rejected": -1893.757080078125, |
|
"eval_loss": 0.5, |
|
"eval_rewards/chosen": -196.24290466308594, |
|
"eval_rewards/margins": -30.684709548950195, |
|
"eval_rewards/rejected": -165.55816650390625, |
|
"eval_runtime": 169.9605, |
|
"eval_samples_per_second": 2.059, |
|
"eval_steps_per_second": 0.518, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00011466666666666667, |
|
"logps/chosen": -2123.240234375, |
|
"logps/rejected": NaN, |
|
"loss": 0.6315, |
|
"rewards/chosen": -188.09486389160156, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00011185964912280702, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.7998, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00010905263157894738, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.5881, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2251.134033203125, |
|
"eval_logps/rejected": -1901.2047119140625, |
|
"eval_loss": 0.5, |
|
"eval_rewards/chosen": -197.1647491455078, |
|
"eval_rewards/margins": -30.8618221282959, |
|
"eval_rewards/rejected": -166.30291748046875, |
|
"eval_runtime": 169.9704, |
|
"eval_samples_per_second": 2.059, |
|
"eval_steps_per_second": 0.518, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00010624561403508772, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.8756, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00010343859649122807, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6156, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2250.90234375, |
|
"eval_logps/rejected": -1901.0179443359375, |
|
"eval_loss": 0.5, |
|
"eval_rewards/chosen": -197.1415557861328, |
|
"eval_rewards/margins": -30.857322692871094, |
|
"eval_rewards/rejected": -166.28424072265625, |
|
"eval_runtime": 169.9616, |
|
"eval_samples_per_second": 2.059, |
|
"eval_steps_per_second": 0.518, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00010063157894736843, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.7376, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 9.782456140350877e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.7998, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 9.501754385964913e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6291, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2250.995849609375, |
|
"eval_logps/rejected": -1901.1036376953125, |
|
"eval_loss": 0.5, |
|
"eval_rewards/chosen": -197.15087890625, |
|
"eval_rewards/margins": -30.858049392700195, |
|
"eval_rewards/rejected": -166.29283142089844, |
|
"eval_runtime": 169.941, |
|
"eval_samples_per_second": 2.06, |
|
"eval_steps_per_second": 0.518, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 9.221052631578948e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.7167, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 8.940350877192983e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6285, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2251.08837890625, |
|
"eval_logps/rejected": -1901.1571044921875, |
|
"eval_loss": 0.5, |
|
"eval_rewards/chosen": -197.16017150878906, |
|
"eval_rewards/margins": -30.86201286315918, |
|
"eval_rewards/rejected": -166.2981719970703, |
|
"eval_runtime": 169.9583, |
|
"eval_samples_per_second": 2.059, |
|
"eval_steps_per_second": 0.518, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 8.659649122807018e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.7898, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 8.378947368421053e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.8174, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 8.098245614035088e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6918, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2251.1103515625, |
|
"eval_logps/rejected": -1901.1773681640625, |
|
"eval_loss": 0.5, |
|
"eval_rewards/chosen": -197.16233825683594, |
|
"eval_rewards/margins": -30.86213493347168, |
|
"eval_rewards/rejected": -166.30018615722656, |
|
"eval_runtime": 170.0642, |
|
"eval_samples_per_second": 2.058, |
|
"eval_steps_per_second": 0.517, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 7.817543859649124e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6965, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 7.536842105263158e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.7869, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2251.116943359375, |
|
"eval_logps/rejected": -1901.21484375, |
|
"eval_loss": 0.5, |
|
"eval_rewards/chosen": -197.16302490234375, |
|
"eval_rewards/margins": -30.85906982421875, |
|
"eval_rewards/rejected": -166.303955078125, |
|
"eval_runtime": 169.9373, |
|
"eval_samples_per_second": 2.06, |
|
"eval_steps_per_second": 0.518, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 7.256140350877193e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6402, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 6.975438596491229e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.8122, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 6.694736842105264e-05, |
|
"logps/chosen": -2150.89111328125, |
|
"logps/rejected": NaN, |
|
"loss": 0.5483, |
|
"rewards/chosen": -190.25399780273438, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2251.134521484375, |
|
"eval_logps/rejected": -1901.1729736328125, |
|
"eval_loss": 0.5, |
|
"eval_rewards/chosen": -197.16481018066406, |
|
"eval_rewards/margins": -30.865028381347656, |
|
"eval_rewards/rejected": -166.29977416992188, |
|
"eval_runtime": 169.9607, |
|
"eval_samples_per_second": 2.059, |
|
"eval_steps_per_second": 0.518, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 6.414035087719299e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 1.0998, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 6.133333333333334e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.7744, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2254.820068359375, |
|
"eval_logps/rejected": -1904.1441650390625, |
|
"eval_loss": 0.5, |
|
"eval_rewards/chosen": -197.53334045410156, |
|
"eval_rewards/margins": -30.936431884765625, |
|
"eval_rewards/rejected": -166.59690856933594, |
|
"eval_runtime": 169.9328, |
|
"eval_samples_per_second": 2.06, |
|
"eval_steps_per_second": 0.518, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 5.852631578947369e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.7891, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 5.571929824561404e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.7203, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 5.291228070175439e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.9077, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2254.888427734375, |
|
"eval_logps/rejected": -1904.1827392578125, |
|
"eval_loss": 0.5, |
|
"eval_rewards/chosen": -197.54017639160156, |
|
"eval_rewards/margins": -30.939420700073242, |
|
"eval_rewards/rejected": -166.6007537841797, |
|
"eval_runtime": 169.9818, |
|
"eval_samples_per_second": 2.059, |
|
"eval_steps_per_second": 0.518, |
|
"step": 1100 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 1470, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|