{ "best_metric": 0.35275527834892273, "best_model_checkpoint": "./zephyr/09-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.18-KTO_Hyperparameter search, altering lora params for KTO task.-2_max_steps-145_batch_16_2024-04-09_ppid_9/checkpoint-100", "epoch": 0.684931506849315, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "grad_norm": 6.346695899963379, "learning_rate": 0.00018142857142857142, "loss": 0.467, "step": 20, "train/kl": 6.117425918579102, "train/logps/chosen": -259.1842447916667, "train/logps/rejected": -290.1948988970588, "train/rewards/chosen": 0.930299072265625, "train/rewards/margins": 0.5634524266860065, "train/rewards/rejected": 0.36684664557961855 }, { "epoch": 0.27, "grad_norm": 4.604153633117676, "learning_rate": 0.00015285714285714287, "loss": 0.4224, "step": 40, "train/kl": 6.080809116363525, "train/logps/chosen": -274.32459677419354, "train/logps/rejected": -291.6558948863636, "train/rewards/chosen": 0.9791939027847782, "train/rewards/margins": 1.6478286295692244, "train/rewards/rejected": -0.668634726784446 }, { "epoch": 0.34, "eval/kl": 7.511639595031738, "eval/logps/chosen": -263.8732394366197, "eval/logps/rejected": -268.6064082278481, "eval/rewards/chosen": 1.9797810299295775, "eval/rewards/margins": 2.403955568071142, "eval/rewards/rejected": -0.42417453814156447, "eval_loss": 0.3916032016277313, "eval_runtime": 141.678, "eval_samples_per_second": 2.117, "eval_steps_per_second": 0.529, "step": 50 }, { "epoch": 0.41, "grad_norm": 5.5113444328308105, "learning_rate": 0.00012428571428571428, "loss": 0.3832, "step": 60, "train/kl": 16.34114646911621, "train/logps/chosen": -240.63917267628204, "train/logps/rejected": -277.463486089939, "train/rewards/chosen": 3.0404166197165465, "train/rewards/margins": 2.3469540618075815, "train/rewards/rejected": 0.6934625579089653 }, { "epoch": 0.55, "grad_norm": 2.7619431018829346, "learning_rate": 9.571428571428573e-05, "loss": 0.3233, "step": 80, "train/kl": 0.7871202826499939, "train/logps/chosen": -294.5624213506711, "train/logps/rejected": -336.1468612938597, "train/rewards/chosen": 0.5479572987396445, "train/rewards/margins": 5.502926202933724, "train/rewards/rejected": -4.954968904194079 }, { "epoch": 0.68, "grad_norm": 4.996425151824951, "learning_rate": 6.714285714285714e-05, "loss": 0.3749, "step": 100, "train/kl": 6.590612888336182, "train/logps/chosen": -265.2372325922819, "train/logps/rejected": -300.5130665204678, "train/rewards/chosen": 2.635832920970533, "train/rewards/margins": 3.7929440163766914, "train/rewards/rejected": -1.1571110954061585 }, { "epoch": 0.68, "eval/kl": 2.193509817123413, "eval/logps/chosen": -268.1729478433099, "eval/logps/rejected": -291.57960838607596, "eval/rewards/chosen": 1.5498130690883583, "eval/rewards/margins": 4.271308299075008, "eval/rewards/rejected": -2.7214952299866497, "eval_loss": 0.35275527834892273, "eval_runtime": 141.6617, "eval_samples_per_second": 2.118, "eval_steps_per_second": 0.529, "step": 100 } ], "logging_steps": 20, "max_steps": 145, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }