|
{ |
|
"best_metric": 0.3754348158836365, |
|
"best_model_checkpoint": "./zephyr/05-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.1-KTO_testing kto dataset during training-3_max_steps-145_batch_16_2024-04-05_ppid_9/checkpoint-100", |
|
"epoch": 0.684931506849315, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 7.029934406280518, |
|
"kl": 2.2542672157287598, |
|
"learning_rate": 0.00018, |
|
"logps/chosen": -356.6376037597656, |
|
"logps/rejected": -393.9584655761719, |
|
"loss": 0.4417, |
|
"rewards/chosen": -3.5771310329437256, |
|
"rewards/margins": 2.5301473140716553, |
|
"rewards/rejected": -6.391368865966797, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 19.954599380493164, |
|
"kl": 6.632220268249512, |
|
"learning_rate": 0.0001542857142857143, |
|
"logps/chosen": -319.3048400878906, |
|
"logps/rejected": -348.5643615722656, |
|
"loss": 0.4315, |
|
"rewards/chosen": -1.7924585342407227, |
|
"rewards/margins": 1.6441251039505005, |
|
"rewards/rejected": -3.656928539276123, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_kl": 32.934268951416016, |
|
"eval_logps/chosen": -264.7897033691406, |
|
"eval_logps/rejected": -267.6116027832031, |
|
"eval_loss": 0.4092177450656891, |
|
"eval_rewards/chosen": 5.2044806480407715, |
|
"eval_rewards/margins": 2.720277786254883, |
|
"eval_rewards/rejected": 2.5654892921447754, |
|
"eval_runtime": 139.3761, |
|
"eval_samples_per_second": 2.152, |
|
"eval_steps_per_second": 0.538, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 4.9014763832092285, |
|
"kl": 22.45343017578125, |
|
"learning_rate": 0.00012571428571428572, |
|
"logps/chosen": -290.8168029785156, |
|
"logps/rejected": -297.7256774902344, |
|
"loss": 0.4154, |
|
"rewards/chosen": 3.169417381286621, |
|
"rewards/margins": 1.9798123836517334, |
|
"rewards/rejected": 1.3037060499191284, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.188873291015625, |
|
"kl": 15.003771781921387, |
|
"learning_rate": 9.714285714285715e-05, |
|
"logps/chosen": -279.8699951171875, |
|
"logps/rejected": -332.38848876953125, |
|
"loss": 0.3492, |
|
"rewards/chosen": 1.9723680019378662, |
|
"rewards/margins": 4.276235103607178, |
|
"rewards/rejected": -2.454475164413452, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 4.462972640991211, |
|
"kl": 4.606294631958008, |
|
"learning_rate": 6.857142857142858e-05, |
|
"logps/chosen": -278.8154602050781, |
|
"logps/rejected": -356.94451904296875, |
|
"loss": 0.3678, |
|
"rewards/chosen": 1.00811767578125, |
|
"rewards/margins": 5.261899471282959, |
|
"rewards/rejected": -4.226025581359863, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_kl": 1.3235526084899902, |
|
"eval_logps/chosen": -330.0657653808594, |
|
"eval_logps/rejected": -357.2244873046875, |
|
"eval_loss": 0.3754348158836365, |
|
"eval_rewards/chosen": -1.3231240510940552, |
|
"eval_rewards/margins": 5.1890459060668945, |
|
"eval_rewards/rejected": -6.395801067352295, |
|
"eval_runtime": 139.0506, |
|
"eval_samples_per_second": 2.157, |
|
"eval_steps_per_second": 0.539, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 145, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|