{ "best_metric": 0.3329291045665741, "best_model_checkpoint": "./zephyr/09-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.19-KTO_Experiment with a new tokenizer configuration for chat template of zephyr-2_max_steps-145_batch_16_2024-04-09_ppid_9/checkpoint-100", "epoch": 0.6462035541195477, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "grad_norm": 4.346810340881348, "kl": 0.026491880416870117, "learning_rate": 0.00018, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.7893, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 20 }, { "epoch": 0.26, "grad_norm": 1.5810322761535645, "kl": 5.172974109649658, "learning_rate": 0.00015142857142857143, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.571, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 40 }, { "epoch": 0.32, "eval_kl": 0.5043153166770935, "eval_logps/chosen": -259.60693359375, "eval_logps/rejected": -281.5754699707031, "eval_loss": 0.3644047975540161, "eval_rewards/chosen": -0.18218907713890076, "eval_rewards/margins": 4.18027400970459, "eval_rewards/rejected": -4.362462520599365, "eval_runtime": 147.5729, "eval_samples_per_second": 2.141, "eval_steps_per_second": 0.535, "step": 50 }, { "epoch": 0.39, "grad_norm": 3.2920711040496826, "kl": 0.34245628118515015, "learning_rate": 0.00012285714285714287, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.5075, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 60 }, { "epoch": 0.52, "grad_norm": 4.848718643188477, "kl": 2.158269166946411, "learning_rate": 9.428571428571429e-05, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.4352, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 80 }, { "epoch": 0.65, "grad_norm": 5.877114295959473, "kl": 3.899824857711792, "learning_rate": 6.571428571428571e-05, "logps/chosen": NaN, "logps/rejected": NaN, "loss": 0.6882, "rewards/chosen": NaN, "rewards/margins": NaN, "rewards/rejected": NaN, "step": 100 }, { "epoch": 0.65, "eval_kl": 3.48748779296875, "eval_logps/chosen": -237.4883270263672, "eval_logps/rejected": -260.14056396484375, "eval_loss": 0.3329291045665741, "eval_rewards/chosen": 2.029669761657715, "eval_rewards/margins": 4.248642444610596, "eval_rewards/rejected": -2.218973159790039, "eval_runtime": 147.7764, "eval_samples_per_second": 2.138, "eval_steps_per_second": 0.535, "step": 100 } ], "logging_steps": 20, "max_steps": 145, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }