File size: 3,695 Bytes
a0c4943
 
806c688
a0c4943
 
 
 
 
 
 
 
 
 
 
806c688
a0c4943
806c688
 
 
 
 
 
a0c4943
 
 
 
 
806c688
a0c4943
 
806c688
 
 
 
 
a0c4943
 
 
 
806c688
 
 
 
 
a0c4943
806c688
 
da8e5ac
a0c4943
 
 
 
 
 
806c688
a0c4943
 
806c688
 
 
 
 
a0c4943
 
 
 
 
806c688
a0c4943
 
806c688
 
 
 
 
a0c4943
 
 
 
 
806c688
a0c4943
 
806c688
 
 
 
 
a0c4943
 
 
 
806c688
 
 
 
 
a0c4943
806c688
 
da8e5ac
a0c4943
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
{
  "best_metric": 0.47333332896232605,
  "best_model_checkpoint": "./zephyr/09-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.15-KTO_Hyperparameter search, altering lora params for KTO task.-2_max_steps-145_batch_16_2024-04-09_ppid_9/checkpoint-100",
  "epoch": 0.684931506849315,
  "eval_steps": 50,
  "global_step": 100,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.14,
      "grad_norm": 0.0,
      "learning_rate": 0.0001785714285714286,
      "loss": 0.4711,
      "step": 20,
      "train/kl": 5.843189239501953,
      "train/logps/chosen": -1172.703515625,
      "train/logps/rejected": -1156.49736328125,
      "train/rewards/chosen": -88.494970703125,
      "train/rewards/margins": -1.6718872070312614,
      "train/rewards/rejected": -86.82308349609374
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.0,
      "learning_rate": 0.00015000000000000001,
      "loss": 0.4437,
      "step": 40,
      "train/kl": 0.0,
      "train/logps/chosen": -2424.414392605634,
      "train/logps/rejected": -2336.605688202247,
      "train/rewards/chosen": -215.21177651848592,
      "train/rewards/margins": -9.581421883654457,
      "train/rewards/rejected": -205.63035463483146
    },
    {
      "epoch": 0.34,
      "eval/kl": 0.0,
      "eval/logps/chosen": -2342.893926056338,
      "eval/logps/rejected": -2118.181566455696,
      "eval/rewards/chosen": -205.92391065140845,
      "eval/rewards/margins": -20.54295238400337,
      "eval/rewards/rejected": -185.38095826740508,
      "eval_loss": 0.47333332896232605,
      "eval_runtime": 140.8471,
      "eval_samples_per_second": 2.13,
      "eval_steps_per_second": 0.532,
      "step": 50
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.0,
      "learning_rate": 0.00012142857142857143,
      "loss": 0.4594,
      "step": 60,
      "train/kl": 0.0,
      "train/logps/chosen": -2388.497661564626,
      "train/logps/rejected": -2377.544617052023,
      "train/rewards/chosen": -211.77136479591837,
      "train/rewards/margins": -3.213425850831669,
      "train/rewards/rejected": -208.5579389450867
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.0,
      "learning_rate": 9.285714285714286e-05,
      "loss": 0.4656,
      "step": 80,
      "train/kl": 0.0,
      "train/logps/chosen": -2349.194211409396,
      "train/logps/rejected": -2324.7878289473683,
      "train/rewards/chosen": -207.3123295931208,
      "train/rewards/margins": -3.7029089059863054,
      "train/rewards/rejected": -203.6094206871345
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.0,
      "learning_rate": 6.428571428571429e-05,
      "loss": 0.4469,
      "step": 100,
      "train/kl": 0.0,
      "train/logps/chosen": -2737.1844405594406,
      "train/logps/rejected": -2257.276836158192,
      "train/rewards/chosen": -243.65840799825176,
      "train/rewards/margins": -45.40216241209356,
      "train/rewards/rejected": -198.2562455861582
    },
    {
      "epoch": 0.68,
      "eval/kl": 0.0,
      "eval/logps/chosen": -2343.730193661972,
      "eval/logps/rejected": -2118.9036787974683,
      "eval/rewards/chosen": -206.00756492077466,
      "eval/rewards/margins": -20.55440283612276,
      "eval/rewards/rejected": -185.4531620846519,
      "eval_loss": 0.47333332896232605,
      "eval_runtime": 140.7532,
      "eval_samples_per_second": 2.131,
      "eval_steps_per_second": 0.533,
      "step": 100
    }
  ],
  "logging_steps": 20,
  "max_steps": 145,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "total_flos": 0.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}