{ "best_metric": 0.5000000596046448, "best_model_checkpoint": "./zephyr/10-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.25-KTO_Experiment with a new tokenizer configuration for chat template of zephyr-2_max_steps-1470_batch_16_2024-04-10_ppid_9/checkpoint-100", "epoch": 1.3201320132013201, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "grad_norm": 57.293792724609375, "kl": 0.03853478282690048, "learning_rate": 6.222222222222222e-05, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.7078, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 20 }, { "epoch": 0.26, "grad_norm": 112.50944519042969, "kl": 3.2648494243621826, "learning_rate": 0.00014666666666666666, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.6966, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 40 }, { "epoch": 0.33, "eval_kl": 0.0, "eval_logps/chosen": -413.6161193847656, "eval_logps/rejected": -362.2559509277344, "eval_loss": 0.5063381791114807, "eval_rewards/chosen": -13.412939071655273, "eval_rewards/margins": -1.0048810243606567, "eval_rewards/rejected": -12.408059120178223, "eval_runtime": 170.1826, "eval_samples_per_second": 2.057, "eval_steps_per_second": 0.517, "step": 50 }, { "epoch": 0.4, "grad_norm": 19.94582748413086, "kl": 0.45922356843948364, "learning_rate": 0.00019887719298245616, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.5743, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 60 }, { "epoch": 0.53, "grad_norm": 79.92957305908203, "kl": 0.0, "learning_rate": 0.0001960701754385965, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.6108, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 80 }, { "epoch": 0.66, "grad_norm": 0.06103940308094025, "kl": 0.0, "learning_rate": 0.00019326315789473686, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.754, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 100 }, { "epoch": 0.66, "eval_kl": 0.0, "eval_logps/chosen": -2027.0018310546875, "eval_logps/rejected": -1697.82177734375, "eval_loss": 0.5000000596046448, "eval_rewards/chosen": -174.75149536132812, "eval_rewards/margins": -28.786863327026367, "eval_rewards/rejected": -145.96463012695312, "eval_runtime": 170.0562, "eval_samples_per_second": 2.058, "eval_steps_per_second": 0.517, "step": 100 }, { "epoch": 0.79, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 0.0001904561403508772, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.95, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 120 }, { "epoch": 0.92, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 0.00018764912280701756, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.6274, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 140 }, { "epoch": 0.99, "eval_kl": 0.0, "eval_logps/chosen": -2237.81494140625, "eval_logps/rejected": -1889.774169921875, "eval_loss": 0.5, "eval_rewards/chosen": -195.83285522460938, "eval_rewards/margins": -30.672954559326172, "eval_rewards/rejected": -165.15989685058594, "eval_runtime": 169.8795, "eval_samples_per_second": 2.06, "eval_steps_per_second": 0.518, "step": 150 }, { "epoch": 1.06, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 0.0001848421052631579, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.6387, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 160 }, { "epoch": 1.19, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 0.00018203508771929826, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.8327, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 180 }, { "epoch": 1.32, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 0.00017922807017543862, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.642, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 200 }, { "epoch": 1.32, "eval_kl": 0.0, "eval_logps/chosen": -2230.916259765625, "eval_logps/rejected": -1884.9520263671875, "eval_loss": 0.5000000596046448, "eval_rewards/chosen": -195.14297485351562, "eval_rewards/margins": -30.465293884277344, "eval_rewards/rejected": -164.67767333984375, "eval_runtime": 170.1489, "eval_samples_per_second": 2.057, "eval_steps_per_second": 0.517, "step": 200 } ], "logging_steps": 20, "max_steps": 1470, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }