beamaia's picture
Training in progress, step 100, checkpoint
90cf7fd verified
raw
history blame
No virus
3.56 kB
{
"best_metric": 0.5443353056907654,
"best_model_checkpoint": "./zephyr/08-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.9-KTO_Hyperparameter search, altering desired and undesired weights for KTO task.-2_max_steps-145_batch_16_2024-04-08_ppid_9/checkpoint-100",
"epoch": 0.684931506849315,
"eval_steps": 50,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14,
"grad_norm": 4.8340067863464355,
"kl": 27.297277450561523,
"learning_rate": 0.00018,
"logps/chosen": -294.6091003417969,
"logps/rejected": -256.2513732910156,
"loss": 0.4863,
"rewards/chosen": 1.882211446762085,
"rewards/margins": 0.8332540392875671,
"rewards/rejected": 0.9183141589164734,
"step": 20
},
{
"epoch": 0.27,
"grad_norm": 0.0,
"kl": 40.49668502807617,
"learning_rate": 0.00015142857142857143,
"logps/chosen": -539.7681884765625,
"logps/rejected": -501.2303161621094,
"loss": 0.4555,
"rewards/chosen": -24.707237243652344,
"rewards/margins": -0.5242304801940918,
"rewards/rejected": -22.829710006713867,
"step": 40
},
{
"epoch": 0.34,
"eval_kl": 0.0,
"eval_logps/chosen": -1786.45947265625,
"eval_logps/rejected": -1616.440185546875,
"eval_loss": 0.5443361401557922,
"eval_rewards/chosen": -148.88607788085938,
"eval_rewards/margins": -20.042972564697266,
"eval_rewards/rejected": -135.36317443847656,
"eval_runtime": 138.4944,
"eval_samples_per_second": 2.166,
"eval_steps_per_second": 0.542,
"step": 50
},
{
"epoch": 0.41,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 0.00012285714285714287,
"logps/chosen": -1740.7340087890625,
"logps/rejected": -1820.6246337890625,
"loss": 0.5427,
"rewards/chosen": -146.22254943847656,
"rewards/margins": 5.4553728103637695,
"rewards/rejected": -151.97308349609375,
"step": 60
},
{
"epoch": 0.55,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 9.428571428571429e-05,
"logps/chosen": -1711.9744873046875,
"logps/rejected": -1807.9703369140625,
"loss": 0.5427,
"rewards/chosen": -143.12269592285156,
"rewards/margins": 8.623757362365723,
"rewards/rejected": -150.7401885986328,
"step": 80
},
{
"epoch": 0.68,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 6.571428571428571e-05,
"logps/chosen": -1719.2364501953125,
"logps/rejected": -1687.2681884765625,
"loss": 0.5714,
"rewards/chosen": -144.7313995361328,
"rewards/margins": -4.064985752105713,
"rewards/rejected": -140.0031280517578,
"step": 100
},
{
"epoch": 0.68,
"eval_kl": 0.0,
"eval_logps/chosen": -1605.7635498046875,
"eval_logps/rejected": -1472.8948974609375,
"eval_loss": 0.5443353056907654,
"eval_rewards/chosen": -130.8164825439453,
"eval_rewards/margins": -16.295190811157227,
"eval_rewards/rejected": -121.00863647460938,
"eval_runtime": 138.4851,
"eval_samples_per_second": 2.166,
"eval_steps_per_second": 0.542,
"step": 100
}
],
"logging_steps": 20,
"max_steps": 145,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}