File size: 3,737 Bytes
78176cd
58e8085
ede5538
78176cd
 
 
 
 
 
 
 
 
ede5538
58e8085
ede5538
78176cd
ede5538
 
 
 
 
 
78176cd
 
 
ede5538
58e8085
ede5538
78176cd
ede5538
 
 
 
 
 
78176cd
 
 
58e8085
ede5538
 
 
 
 
58e8085
ede5538
 
 
78176cd
 
 
 
58e8085
 
ede5538
78176cd
58e8085
ede5538
 
 
 
 
78176cd
 
 
58e8085
 
ede5538
78176cd
58e8085
ede5538
 
 
 
 
78176cd
 
 
58e8085
 
ede5538
78176cd
58e8085
ede5538
 
 
 
 
78176cd
 
 
58e8085
ede5538
 
 
 
 
58e8085
ede5538
 
 
78176cd
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
{
  "best_metric": 0.47333332896232605,
  "best_model_checkpoint": "./zephyr/09-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.17-KTO_Hyperparameter search, altering lora params for KTO task.-2_max_steps-145_batch_16_2024-04-09_ppid_9/checkpoint-100",
  "epoch": 0.684931506849315,
  "eval_steps": 50,
  "global_step": 100,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.14,
      "grad_norm": 3.690321445465088,
      "learning_rate": 0.00018,
      "loss": 0.4288,
      "step": 20,
      "train/kl": 4.702511787414551,
      "train/logps/chosen": -287.9634468129139,
      "train/logps/rejected": -294.00746579142015,
      "train/rewards/chosen": -0.4468543450563949,
      "train/rewards/margins": 1.449634002646083,
      "train/rewards/rejected": -1.8964883477024779
    },
    {
      "epoch": 0.27,
      "grad_norm": 9.52605017090491e-09,
      "learning_rate": 0.00015142857142857143,
      "loss": 0.4014,
      "step": 40,
      "train/kl": 11.584417343139648,
      "train/logps/chosen": -482.1775173611111,
      "train/logps/rejected": -527.2658025568181,
      "train/rewards/chosen": -20.685902913411457,
      "train/rewards/margins": 3.630714185310133,
      "train/rewards/rejected": -24.31661709872159
    },
    {
      "epoch": 0.34,
      "eval/kl": 0.0,
      "eval/logps/chosen": -1703.6540492957747,
      "eval/logps/rejected": -1555.5559731012659,
      "eval/rewards/chosen": -141.9996423855634,
      "eval/rewards/margins": -12.881070391892507,
      "eval/rewards/rejected": -129.11857199367088,
      "eval_loss": 0.47333332896232605,
      "eval_runtime": 139.1542,
      "eval_samples_per_second": 2.156,
      "eval_steps_per_second": 0.539,
      "step": 50
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.0,
      "learning_rate": 0.00012285714285714287,
      "loss": 0.4781,
      "step": 60,
      "train/kl": 0.0,
      "train/logps/chosen": -1893.7743055555557,
      "train/logps/rejected": -1621.3242889221558,
      "train/rewards/chosen": -159.2800372753268,
      "train/rewards/margins": -25.875120546284876,
      "train/rewards/rejected": -133.40491672904193
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.0,
      "learning_rate": 9.428571428571429e-05,
      "loss": 0.4813,
      "step": 80,
      "train/kl": 0.0,
      "train/logps/chosen": -1972.2258522727273,
      "train/logps/rejected": -1762.839984939759,
      "train/rewards/chosen": -167.9407721185065,
      "train/rewards/margins": -19.929229855705273,
      "train/rewards/rejected": -148.0115422628012
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.0,
      "learning_rate": 6.571428571428571e-05,
      "loss": 0.4875,
      "step": 100,
      "train/kl": 0.0,
      "train/logps/chosen": -1872.451923076923,
      "train/logps/rejected": -1815.126524390244,
      "train/rewards/chosen": -158.9895958533654,
      "train/rewards/margins": -6.588621672725139,
      "train/rewards/rejected": -152.40097418064025
    },
    {
      "epoch": 0.68,
      "eval/kl": 0.0,
      "eval/logps/chosen": -1757.160761443662,
      "eval/logps/rejected": -1608.579509493671,
      "eval/rewards/chosen": -147.35032460387325,
      "eval/rewards/margins": -12.929376720170723,
      "eval/rewards/rejected": -134.42094788370252,
      "eval_loss": 0.47333332896232605,
      "eval_runtime": 138.9701,
      "eval_samples_per_second": 2.159,
      "eval_steps_per_second": 0.54,
      "step": 100
    }
  ],
  "logging_steps": 20,
  "max_steps": 145,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "total_flos": 0.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}