beamaia's picture
Training in progress, step 100, checkpoint
5b974ea verified
{
"best_metric": 0.47915583848953247,
"best_model_checkpoint": "./zephyr/10-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.27-KTO_Experiment with a new tokenizer configuration for chat template of zephyr-2_max_steps-147_batch_16_2024-04-10_ppid_9/checkpoint-100",
"epoch": 0.6600660066006601,
"eval_steps": 50,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.13,
"grad_norm": 87.22119140625,
"kl": 0.0514632947742939,
"learning_rate": 1.8732394366197181e-06,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 0.7523,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 20
},
{
"epoch": 0.26,
"grad_norm": 75.8567123413086,
"kl": 0.3588915467262268,
"learning_rate": 1.591549295774648e-06,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 0.5807,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 40
},
{
"epoch": 0.33,
"eval_kl": 0.4327344000339508,
"eval_logps/chosen": -279.24505615234375,
"eval_logps/rejected": -238.83538818359375,
"eval_loss": 0.48886266350746155,
"eval_rewards/chosen": 0.024170810356736183,
"eval_rewards/margins": 0.09017277508974075,
"eval_rewards/rejected": -0.06600195914506912,
"eval_runtime": 169.6418,
"eval_samples_per_second": 2.063,
"eval_steps_per_second": 0.519,
"step": 50
},
{
"epoch": 0.4,
"grad_norm": 86.48399353027344,
"kl": 0.4763767719268799,
"learning_rate": 1.3098591549295774e-06,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 0.6521,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 60
},
{
"epoch": 0.53,
"grad_norm": 89.11936950683594,
"kl": 0.35531342029571533,
"learning_rate": 1.028169014084507e-06,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 0.7257,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 80
},
{
"epoch": 0.66,
"grad_norm": 83.82781219482422,
"kl": 0.20901036262512207,
"learning_rate": 7.464788732394366e-07,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 0.6536,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 100
},
{
"epoch": 0.66,
"eval_kl": 0.20153704285621643,
"eval_logps/chosen": -280.2303771972656,
"eval_logps/rejected": -240.73260498046875,
"eval_loss": 0.47915583848953247,
"eval_rewards/chosen": -0.07436122000217438,
"eval_rewards/margins": 0.18136295676231384,
"eval_rewards/rejected": -0.255724161863327,
"eval_runtime": 169.8539,
"eval_samples_per_second": 2.061,
"eval_steps_per_second": 0.518,
"step": 100
}
],
"logging_steps": 20,
"max_steps": 147,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}