|
{ |
|
"best_metric": 0.47333332896232605, |
|
"best_model_checkpoint": "./zephyr/08-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.17-KTO_Hyperparameter search, altering lora params for KTO task.-2_max_steps-145_batch_16_2024-04-08_ppid_9/checkpoint-100", |
|
"epoch": 0.684931506849315, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00018, |
|
"loss": 0.4638, |
|
"step": 20, |
|
"train/kl": 7.083856105804443, |
|
"train/logps/chosen": -426.40190360915494, |
|
"train/logps/rejected": -550.5845768960675, |
|
"train/rewards/chosen": -14.309082890900088, |
|
"train/rewards/margins": 11.978627234626599, |
|
"train/rewards/rejected": -26.287710125526687 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00015142857142857143, |
|
"loss": 0.4844, |
|
"step": 40, |
|
"train/kl": 0.0, |
|
"train/logps/chosen": -3204.072177419355, |
|
"train/logps/rejected": -3086.9876893939395, |
|
"train/rewards/chosen": -292.8815524193548, |
|
"train/rewards/margins": -13.51173707844572, |
|
"train/rewards/rejected": -279.3698153409091 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval/kl": 0.0, |
|
"eval/logps/chosen": -2537.9452024647885, |
|
"eval/logps/rejected": -2313.8192246835442, |
|
"eval/rewards/chosen": -225.4286971830986, |
|
"eval/rewards/margins": -20.483273429142884, |
|
"eval/rewards/rejected": -204.9454237539557, |
|
"eval_loss": 0.47333332896232605, |
|
"eval_runtime": 143.3034, |
|
"eval_samples_per_second": 2.093, |
|
"eval_steps_per_second": 0.523, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00012285714285714287, |
|
"loss": 0.5, |
|
"step": 60, |
|
"train/kl": 0.0, |
|
"train/logps/chosen": -2750.593359375, |
|
"train/logps/rejected": -2645.1216796875, |
|
"train/rewards/chosen": -247.13798828125, |
|
"train/rewards/margins": -11.170458984374989, |
|
"train/rewards/rejected": -235.967529296875 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.428571428571429e-05, |
|
"loss": 0.425, |
|
"step": 80, |
|
"train/kl": 0.0, |
|
"train/logps/chosen": -2778.7603400735293, |
|
"train/logps/rejected": -2450.7654551630435, |
|
"train/rewards/chosen": -248.5206801470588, |
|
"train/rewards/margins": -31.70898687260228, |
|
"train/rewards/rejected": -216.81169327445653 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.571428571428571e-05, |
|
"loss": 0.5031, |
|
"step": 100, |
|
"train/kl": 0.0, |
|
"train/logps/chosen": -2629.9400232919256, |
|
"train/logps/rejected": -2546.1786556603774, |
|
"train/rewards/chosen": -234.98452057453417, |
|
"train/rewards/margins": -9.083110393716566, |
|
"train/rewards/rejected": -225.9014101808176 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval/kl": 0.0, |
|
"eval/logps/chosen": -2499.3208626760565, |
|
"eval/logps/rejected": -2280.931566455696, |
|
"eval/rewards/chosen": -221.56628246038733, |
|
"eval/rewards/margins": -19.909649252317706, |
|
"eval/rewards/rejected": -201.65663320806962, |
|
"eval_loss": 0.47333332896232605, |
|
"eval_runtime": 143.2853, |
|
"eval_samples_per_second": 2.094, |
|
"eval_steps_per_second": 0.523, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 145, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|