{ "best_metric": 0.5443353056907654, "best_model_checkpoint": "./zephyr/08-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.9-KTO_Hyperparameter search, altering desired and undesired weights for KTO task.-2_max_steps-145_batch_16_2024-04-08_ppid_9/checkpoint-100", "epoch": 0.684931506849315, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "grad_norm": 4.8340067863464355, "kl": 27.297277450561523, "learning_rate": 0.00018, "logps/chosen": -294.6091003417969, "logps/rejected": -256.2513732910156, "loss": 0.4863, "rewards/chosen": 1.882211446762085, "rewards/margins": 0.8332540392875671, "rewards/rejected": 0.9183141589164734, "step": 20 }, { "epoch": 0.27, "grad_norm": 0.0, "kl": 40.49668502807617, "learning_rate": 0.00015142857142857143, "logps/chosen": -539.7681884765625, "logps/rejected": -501.2303161621094, "loss": 0.4555, "rewards/chosen": -24.707237243652344, "rewards/margins": -0.5242304801940918, "rewards/rejected": -22.829710006713867, "step": 40 }, { "epoch": 0.34, "eval_kl": 0.0, "eval_logps/chosen": -1786.45947265625, "eval_logps/rejected": -1616.440185546875, "eval_loss": 0.5443361401557922, "eval_rewards/chosen": -148.88607788085938, "eval_rewards/margins": -20.042972564697266, "eval_rewards/rejected": -135.36317443847656, "eval_runtime": 138.4944, "eval_samples_per_second": 2.166, "eval_steps_per_second": 0.542, "step": 50 }, { "epoch": 0.41, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 0.00012285714285714287, "logps/chosen": -1740.7340087890625, "logps/rejected": -1820.6246337890625, "loss": 0.5427, "rewards/chosen": -146.22254943847656, "rewards/margins": 5.4553728103637695, "rewards/rejected": -151.97308349609375, "step": 60 }, { "epoch": 0.55, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 9.428571428571429e-05, "logps/chosen": -1711.9744873046875, "logps/rejected": -1807.9703369140625, "loss": 0.5427, "rewards/chosen": -143.12269592285156, "rewards/margins": 8.623757362365723, "rewards/rejected": -150.7401885986328, "step": 80 }, { "epoch": 0.68, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 6.571428571428571e-05, "logps/chosen": -1719.2364501953125, "logps/rejected": -1687.2681884765625, "loss": 0.5714, "rewards/chosen": -144.7313995361328, "rewards/margins": -4.064985752105713, "rewards/rejected": -140.0031280517578, "step": 100 }, { "epoch": 0.68, "eval_kl": 0.0, "eval_logps/chosen": -1605.7635498046875, "eval_logps/rejected": -1472.8948974609375, "eval_loss": 0.5443353056907654, "eval_rewards/chosen": -130.8164825439453, "eval_rewards/margins": -16.295190811157227, "eval_rewards/rejected": -121.00863647460938, "eval_runtime": 138.4851, "eval_samples_per_second": 2.166, "eval_steps_per_second": 0.542, "step": 100 } ], "logging_steps": 20, "max_steps": 145, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }