File size: 3,737 Bytes
78176cd 58e8085 ede5538 78176cd ede5538 58e8085 ede5538 78176cd ede5538 78176cd ede5538 58e8085 ede5538 78176cd ede5538 78176cd 58e8085 ede5538 58e8085 ede5538 78176cd 58e8085 ede5538 78176cd 58e8085 ede5538 78176cd 58e8085 ede5538 78176cd 58e8085 ede5538 78176cd 58e8085 ede5538 78176cd 58e8085 ede5538 78176cd 58e8085 ede5538 58e8085 ede5538 78176cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
{
"best_metric": 0.47333332896232605,
"best_model_checkpoint": "./zephyr/09-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.17-KTO_Hyperparameter search, altering lora params for KTO task.-2_max_steps-145_batch_16_2024-04-09_ppid_9/checkpoint-100",
"epoch": 0.684931506849315,
"eval_steps": 50,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14,
"grad_norm": 3.690321445465088,
"learning_rate": 0.00018,
"loss": 0.4288,
"step": 20,
"train/kl": 4.702511787414551,
"train/logps/chosen": -287.9634468129139,
"train/logps/rejected": -294.00746579142015,
"train/rewards/chosen": -0.4468543450563949,
"train/rewards/margins": 1.449634002646083,
"train/rewards/rejected": -1.8964883477024779
},
{
"epoch": 0.27,
"grad_norm": 9.52605017090491e-09,
"learning_rate": 0.00015142857142857143,
"loss": 0.4014,
"step": 40,
"train/kl": 11.584417343139648,
"train/logps/chosen": -482.1775173611111,
"train/logps/rejected": -527.2658025568181,
"train/rewards/chosen": -20.685902913411457,
"train/rewards/margins": 3.630714185310133,
"train/rewards/rejected": -24.31661709872159
},
{
"epoch": 0.34,
"eval/kl": 0.0,
"eval/logps/chosen": -1703.6540492957747,
"eval/logps/rejected": -1555.5559731012659,
"eval/rewards/chosen": -141.9996423855634,
"eval/rewards/margins": -12.881070391892507,
"eval/rewards/rejected": -129.11857199367088,
"eval_loss": 0.47333332896232605,
"eval_runtime": 139.1542,
"eval_samples_per_second": 2.156,
"eval_steps_per_second": 0.539,
"step": 50
},
{
"epoch": 0.41,
"grad_norm": 0.0,
"learning_rate": 0.00012285714285714287,
"loss": 0.4781,
"step": 60,
"train/kl": 0.0,
"train/logps/chosen": -1893.7743055555557,
"train/logps/rejected": -1621.3242889221558,
"train/rewards/chosen": -159.2800372753268,
"train/rewards/margins": -25.875120546284876,
"train/rewards/rejected": -133.40491672904193
},
{
"epoch": 0.55,
"grad_norm": 0.0,
"learning_rate": 9.428571428571429e-05,
"loss": 0.4813,
"step": 80,
"train/kl": 0.0,
"train/logps/chosen": -1972.2258522727273,
"train/logps/rejected": -1762.839984939759,
"train/rewards/chosen": -167.9407721185065,
"train/rewards/margins": -19.929229855705273,
"train/rewards/rejected": -148.0115422628012
},
{
"epoch": 0.68,
"grad_norm": 0.0,
"learning_rate": 6.571428571428571e-05,
"loss": 0.4875,
"step": 100,
"train/kl": 0.0,
"train/logps/chosen": -1872.451923076923,
"train/logps/rejected": -1815.126524390244,
"train/rewards/chosen": -158.9895958533654,
"train/rewards/margins": -6.588621672725139,
"train/rewards/rejected": -152.40097418064025
},
{
"epoch": 0.68,
"eval/kl": 0.0,
"eval/logps/chosen": -1757.160761443662,
"eval/logps/rejected": -1608.579509493671,
"eval/rewards/chosen": -147.35032460387325,
"eval/rewards/margins": -12.929376720170723,
"eval/rewards/rejected": -134.42094788370252,
"eval_loss": 0.47333332896232605,
"eval_runtime": 138.9701,
"eval_samples_per_second": 2.159,
"eval_steps_per_second": 0.54,
"step": 100
}
],
"logging_steps": 20,
"max_steps": 145,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|