|
{ |
|
"best_metric": 0.5000000596046448, |
|
"best_model_checkpoint": "./zephyr/10-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.25-KTO_Experiment with a new tokenizer configuration for chat template of zephyr-2_max_steps-1470_batch_16_2024-04-10_ppid_9/checkpoint-100", |
|
"epoch": 1.3201320132013201, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 57.293792724609375, |
|
"kl": 0.03853478282690048, |
|
"learning_rate": 6.222222222222222e-05, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.7078, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 112.50944519042969, |
|
"kl": 3.2648494243621826, |
|
"learning_rate": 0.00014666666666666666, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6966, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -413.6161193847656, |
|
"eval_logps/rejected": -362.2559509277344, |
|
"eval_loss": 0.5063381791114807, |
|
"eval_rewards/chosen": -13.412939071655273, |
|
"eval_rewards/margins": -1.0048810243606567, |
|
"eval_rewards/rejected": -12.408059120178223, |
|
"eval_runtime": 170.1826, |
|
"eval_samples_per_second": 2.057, |
|
"eval_steps_per_second": 0.517, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 19.94582748413086, |
|
"kl": 0.45922356843948364, |
|
"learning_rate": 0.00019887719298245616, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.5743, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 79.92957305908203, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001960701754385965, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6108, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.06103940308094025, |
|
"kl": 0.0, |
|
"learning_rate": 0.00019326315789473686, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.754, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2027.0018310546875, |
|
"eval_logps/rejected": -1697.82177734375, |
|
"eval_loss": 0.5000000596046448, |
|
"eval_rewards/chosen": -174.75149536132812, |
|
"eval_rewards/margins": -28.786863327026367, |
|
"eval_rewards/rejected": -145.96463012695312, |
|
"eval_runtime": 170.0562, |
|
"eval_samples_per_second": 2.058, |
|
"eval_steps_per_second": 0.517, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001904561403508772, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.95, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00018764912280701756, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6274, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2237.81494140625, |
|
"eval_logps/rejected": -1889.774169921875, |
|
"eval_loss": 0.5, |
|
"eval_rewards/chosen": -195.83285522460938, |
|
"eval_rewards/margins": -30.672954559326172, |
|
"eval_rewards/rejected": -165.15989685058594, |
|
"eval_runtime": 169.8795, |
|
"eval_samples_per_second": 2.06, |
|
"eval_steps_per_second": 0.518, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.0001848421052631579, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6387, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00018203508771929826, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.8327, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00017922807017543862, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.642, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -2230.916259765625, |
|
"eval_logps/rejected": -1884.9520263671875, |
|
"eval_loss": 0.5000000596046448, |
|
"eval_rewards/chosen": -195.14297485351562, |
|
"eval_rewards/margins": -30.465293884277344, |
|
"eval_rewards/rejected": -164.67767333984375, |
|
"eval_runtime": 170.1489, |
|
"eval_samples_per_second": 2.057, |
|
"eval_steps_per_second": 0.517, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 1470, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|