|
{ |
|
"best_metric": 0.47915583848953247, |
|
"best_model_checkpoint": "./zephyr/10-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.27-KTO_Experiment with a new tokenizer configuration for chat template of zephyr-2_max_steps-147_batch_16_2024-04-10_ppid_9/checkpoint-100", |
|
"epoch": 0.6600660066006601, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 87.22119140625, |
|
"kl": 0.0514632947742939, |
|
"learning_rate": 1.8732394366197181e-06, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.7523, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 75.8567123413086, |
|
"kl": 0.3588915467262268, |
|
"learning_rate": 1.591549295774648e-06, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.5807, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_kl": 0.4327344000339508, |
|
"eval_logps/chosen": -279.24505615234375, |
|
"eval_logps/rejected": -238.83538818359375, |
|
"eval_loss": 0.48886266350746155, |
|
"eval_rewards/chosen": 0.024170810356736183, |
|
"eval_rewards/margins": 0.09017277508974075, |
|
"eval_rewards/rejected": -0.06600195914506912, |
|
"eval_runtime": 169.6418, |
|
"eval_samples_per_second": 2.063, |
|
"eval_steps_per_second": 0.519, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 86.48399353027344, |
|
"kl": 0.4763767719268799, |
|
"learning_rate": 1.3098591549295774e-06, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6521, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 89.11936950683594, |
|
"kl": 0.35531342029571533, |
|
"learning_rate": 1.028169014084507e-06, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.7257, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 83.82781219482422, |
|
"kl": 0.20901036262512207, |
|
"learning_rate": 7.464788732394366e-07, |
|
"logps/chosen": NaN, |
|
"logps/rejected": NaN, |
|
"loss": 0.6536, |
|
"rewards/chosen": NaN, |
|
"rewards/margins": NaN, |
|
"rewards/rejected": NaN, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_kl": 0.20153704285621643, |
|
"eval_logps/chosen": -280.2303771972656, |
|
"eval_logps/rejected": -240.73260498046875, |
|
"eval_loss": 0.47915583848953247, |
|
"eval_rewards/chosen": -0.07436122000217438, |
|
"eval_rewards/margins": 0.18136295676231384, |
|
"eval_rewards/rejected": -0.255724161863327, |
|
"eval_runtime": 169.8539, |
|
"eval_samples_per_second": 2.061, |
|
"eval_steps_per_second": 0.518, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 147, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|