|
{ |
|
"best_metric": 0.47333332896232605, |
|
"best_model_checkpoint": "./zephyr/06-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.1-KTO_testing kto dataset during training-3_max_steps-145_batch_16_2024-04-06_ppid_9/checkpoint-100", |
|
"epoch": 0.684931506849315, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 34.550357818603516, |
|
"kl": 40.30364227294922, |
|
"learning_rate": 0.00018571428571428572, |
|
"logps/chosen": -341.70428466796875, |
|
"logps/rejected": -376.15447998046875, |
|
"loss": 0.4341, |
|
"rewards/chosen": 4.142109394073486, |
|
"rewards/margins": 5.559747695922852, |
|
"rewards/rejected": -1.2980353832244873, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.0, |
|
"kl": 9.54911994934082, |
|
"learning_rate": 0.00015857142857142857, |
|
"logps/chosen": -2304.16552734375, |
|
"logps/rejected": -2427.63427734375, |
|
"loss": 0.5152, |
|
"rewards/chosen": -194.421875, |
|
"rewards/margins": 7.0421857833862305, |
|
"rewards/rejected": -205.4108428955078, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -3220.12841796875, |
|
"eval_logps/rejected": -2905.742919921875, |
|
"eval_loss": 0.47333332896232605, |
|
"eval_rewards/chosen": -289.121337890625, |
|
"eval_rewards/margins": -35.262752532958984, |
|
"eval_rewards/rejected": -256.6285705566406, |
|
"eval_runtime": 138.045, |
|
"eval_samples_per_second": 2.173, |
|
"eval_steps_per_second": 0.543, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00013000000000000002, |
|
"logps/chosen": -3215.18505859375, |
|
"logps/rejected": -3101.622802734375, |
|
"loss": 0.4344, |
|
"rewards/chosen": -290.7165222167969, |
|
"rewards/margins": -12.484609603881836, |
|
"rewards/rejected": -276.9840393066406, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 0.00010142857142857143, |
|
"logps/chosen": -3785.22607421875, |
|
"logps/rejected": -3411.793701171875, |
|
"loss": 0.4781, |
|
"rewards/chosen": -344.2449951171875, |
|
"rewards/margins": -43.561405181884766, |
|
"rewards/rejected": -306.4319152832031, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 7.285714285714286e-05, |
|
"logps/chosen": -3814.2275390625, |
|
"logps/rejected": -3579.71630859375, |
|
"loss": 0.4344, |
|
"rewards/chosen": -343.0238952636719, |
|
"rewards/margins": -25.151195526123047, |
|
"rewards/rejected": -323.59967041015625, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -3431.77734375, |
|
"eval_logps/rejected": -3077.673583984375, |
|
"eval_loss": 0.47333332896232605, |
|
"eval_rewards/chosen": -310.2862854003906, |
|
"eval_rewards/margins": -38.59312438964844, |
|
"eval_rewards/rejected": -273.8216552734375, |
|
"eval_runtime": 138.0016, |
|
"eval_samples_per_second": 2.174, |
|
"eval_steps_per_second": 0.543, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 145, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|