|
{ |
|
"best_metric": 0.5443353056907654, |
|
"best_model_checkpoint": "./zephyr/08-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.9-KTO_Hyperparameter search, altering desired and undesired weights for KTO task.-2_max_steps-145_batch_16_2024-04-08_ppid_9/checkpoint-100", |
|
"epoch": 0.684931506849315, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 4.8340067863464355, |
|
"kl": 27.297277450561523, |
|
"learning_rate": 0.00018, |
|
"logps/chosen": -294.6091003417969, |
|
"logps/rejected": -256.2513732910156, |
|
"loss": 0.4863, |
|
"rewards/chosen": 1.882211446762085, |
|
"rewards/margins": 0.8332540392875671, |
|
"rewards/rejected": 0.9183141589164734, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.0, |
|
"kl": 40.49668502807617, |
|
"learning_rate": 0.00015142857142857143, |
|
"logps/chosen": -539.7681884765625, |
|
"logps/rejected": -501.2303161621094, |
|
"loss": 0.4555, |
|
"rewards/chosen": -24.707237243652344, |
|
"rewards/margins": -0.5242304801940918, |
|
"rewards/rejected": -22.829710006713867, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -1786.45947265625, |
|
"eval_logps/rejected": -1616.440185546875, |
|
"eval_loss": 0.5443361401557922, |
|
"eval_rewards/chosen": -148.88607788085938, |
|
"eval_rewards/margins": -20.042972564697266, |
|
"eval_rewards/rejected": -135.36317443847656, |
|
"eval_runtime": 138.4944, |
|
"eval_samples_per_second": 2.166, |
|
"eval_steps_per_second": 0.542, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00012285714285714287, |
|
"logps/chosen": -1740.7340087890625, |
|
"logps/rejected": -1820.6246337890625, |
|
"loss": 0.5427, |
|
"rewards/chosen": -146.22254943847656, |
|
"rewards/margins": 5.4553728103637695, |
|
"rewards/rejected": -151.97308349609375, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 9.428571428571429e-05, |
|
"logps/chosen": -1711.9744873046875, |
|
"logps/rejected": -1807.9703369140625, |
|
"loss": 0.5427, |
|
"rewards/chosen": -143.12269592285156, |
|
"rewards/margins": 8.623757362365723, |
|
"rewards/rejected": -150.7401885986328, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 6.571428571428571e-05, |
|
"logps/chosen": -1719.2364501953125, |
|
"logps/rejected": -1687.2681884765625, |
|
"loss": 0.5714, |
|
"rewards/chosen": -144.7313995361328, |
|
"rewards/margins": -4.064985752105713, |
|
"rewards/rejected": -140.0031280517578, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -1605.7635498046875, |
|
"eval_logps/rejected": -1472.8948974609375, |
|
"eval_loss": 0.5443353056907654, |
|
"eval_rewards/chosen": -130.8164825439453, |
|
"eval_rewards/margins": -16.295190811157227, |
|
"eval_rewards/rejected": -121.00863647460938, |
|
"eval_runtime": 138.4851, |
|
"eval_samples_per_second": 2.166, |
|
"eval_steps_per_second": 0.542, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 145, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|