beamaia's picture
Training in progress, step 100, checkpoint
806c688 verified
raw
history blame
3.7 kB
{
"best_metric": 0.47333332896232605,
"best_model_checkpoint": "./zephyr/09-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.15-KTO_Hyperparameter search, altering lora params for KTO task.-2_max_steps-145_batch_16_2024-04-09_ppid_9/checkpoint-100",
"epoch": 0.684931506849315,
"eval_steps": 50,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14,
"grad_norm": 0.0,
"learning_rate": 0.0001785714285714286,
"loss": 0.4711,
"step": 20,
"train/kl": 5.843189239501953,
"train/logps/chosen": -1172.703515625,
"train/logps/rejected": -1156.49736328125,
"train/rewards/chosen": -88.494970703125,
"train/rewards/margins": -1.6718872070312614,
"train/rewards/rejected": -86.82308349609374
},
{
"epoch": 0.27,
"grad_norm": 0.0,
"learning_rate": 0.00015000000000000001,
"loss": 0.4437,
"step": 40,
"train/kl": 0.0,
"train/logps/chosen": -2424.414392605634,
"train/logps/rejected": -2336.605688202247,
"train/rewards/chosen": -215.21177651848592,
"train/rewards/margins": -9.581421883654457,
"train/rewards/rejected": -205.63035463483146
},
{
"epoch": 0.34,
"eval/kl": 0.0,
"eval/logps/chosen": -2342.893926056338,
"eval/logps/rejected": -2118.181566455696,
"eval/rewards/chosen": -205.92391065140845,
"eval/rewards/margins": -20.54295238400337,
"eval/rewards/rejected": -185.38095826740508,
"eval_loss": 0.47333332896232605,
"eval_runtime": 140.8471,
"eval_samples_per_second": 2.13,
"eval_steps_per_second": 0.532,
"step": 50
},
{
"epoch": 0.41,
"grad_norm": 0.0,
"learning_rate": 0.00012142857142857143,
"loss": 0.4594,
"step": 60,
"train/kl": 0.0,
"train/logps/chosen": -2388.497661564626,
"train/logps/rejected": -2377.544617052023,
"train/rewards/chosen": -211.77136479591837,
"train/rewards/margins": -3.213425850831669,
"train/rewards/rejected": -208.5579389450867
},
{
"epoch": 0.55,
"grad_norm": 0.0,
"learning_rate": 9.285714285714286e-05,
"loss": 0.4656,
"step": 80,
"train/kl": 0.0,
"train/logps/chosen": -2349.194211409396,
"train/logps/rejected": -2324.7878289473683,
"train/rewards/chosen": -207.3123295931208,
"train/rewards/margins": -3.7029089059863054,
"train/rewards/rejected": -203.6094206871345
},
{
"epoch": 0.68,
"grad_norm": 0.0,
"learning_rate": 6.428571428571429e-05,
"loss": 0.4469,
"step": 100,
"train/kl": 0.0,
"train/logps/chosen": -2737.1844405594406,
"train/logps/rejected": -2257.276836158192,
"train/rewards/chosen": -243.65840799825176,
"train/rewards/margins": -45.40216241209356,
"train/rewards/rejected": -198.2562455861582
},
{
"epoch": 0.68,
"eval/kl": 0.0,
"eval/logps/chosen": -2343.730193661972,
"eval/logps/rejected": -2118.9036787974683,
"eval/rewards/chosen": -206.00756492077466,
"eval/rewards/margins": -20.55440283612276,
"eval/rewards/rejected": -185.4531620846519,
"eval_loss": 0.47333332896232605,
"eval_runtime": 140.7532,
"eval_samples_per_second": 2.131,
"eval_steps_per_second": 0.533,
"step": 100
}
],
"logging_steps": 20,
"max_steps": 145,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}