{ "best_metric": 0.4733425974845886, "best_model_checkpoint": "./zephyr/05-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.1-KTO_testing kto dataset during training-3_max_steps-145_batch_16_2024-04-05_ppid_9/checkpoint-100", "epoch": 0.684931506849315, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "grad_norm": 8.764093399047852, "kl": 11.000414848327637, "learning_rate": 0.00018285714285714286, "logps/chosen": -320.3092041015625, "logps/rejected": -296.3908386230469, "loss": 0.432, "rewards/chosen": 1.4254474639892578, "rewards/margins": 1.3692480325698853, "rewards/rejected": 0.0951162651181221, "step": 20 }, { "epoch": 0.27, "grad_norm": 5.878363609313965, "kl": 1.2673273086547852, "learning_rate": 0.0001542857142857143, "logps/chosen": -307.4684753417969, "logps/rejected": -375.4805603027344, "loss": 0.4081, "rewards/chosen": -1.0870614051818848, "rewards/margins": 3.670943021774292, "rewards/rejected": -4.587001800537109, "step": 40 }, { "epoch": 0.34, "eval_kl": 1.7102612257003784, "eval_logps/chosen": -316.2467346191406, "eval_logps/rejected": -332.64117431640625, "eval_loss": 0.38967660069465637, "eval_rewards/chosen": -0.9805464744567871, "eval_rewards/margins": 3.1516332626342773, "eval_rewards/rejected": -3.9925851821899414, "eval_runtime": 138.3084, "eval_samples_per_second": 2.169, "eval_steps_per_second": 0.542, "step": 50 }, { "epoch": 0.41, "grad_norm": 12.671459197998047, "kl": 2.424811840057373, "learning_rate": 0.00012714285714285714, "logps/chosen": -321.6191101074219, "logps/rejected": -360.3143310546875, "loss": 0.4136, "rewards/chosen": -0.7625396847724915, "rewards/margins": 2.869812250137329, "rewards/rejected": -3.8002796173095703, "step": 60 }, { "epoch": 0.55, "grad_norm": 6.753478050231934, "kl": 5.016882419586182, "learning_rate": 9.857142857142858e-05, "logps/chosen": -314.6625671386719, "logps/rejected": -389.6596374511719, "loss": 0.4034, "rewards/chosen": -1.544058084487915, "rewards/margins": 5.419744968414307, "rewards/rejected": -7.084101676940918, "step": 80 }, { "epoch": 0.68, "grad_norm": 0.0027679901104420424, "kl": 0.5703033208847046, "learning_rate": 7e-05, "logps/chosen": -532.3701171875, "logps/rejected": -549.9442749023438, "loss": 0.4304, "rewards/chosen": -21.35959243774414, "rewards/margins": 2.940380096435547, "rewards/rejected": -22.644678115844727, "step": 100 }, { "epoch": 0.68, "eval_kl": 0.0, "eval_logps/chosen": -1804.134765625, "eval_logps/rejected": -1664.4483642578125, "eval_loss": 0.4733425974845886, "eval_rewards/chosen": -149.7693634033203, "eval_rewards/margins": -9.425224304199219, "eval_rewards/rejected": -137.1732940673828, "eval_runtime": 138.3606, "eval_samples_per_second": 2.168, "eval_steps_per_second": 0.542, "step": 100 } ], "logging_steps": 20, "max_steps": 145, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }