File size: 3,847 Bytes
4ecd0f1
f8c44d0
 
4ecd0f1
 
 
 
 
 
 
 
 
f8c44d0
c7be3b7
f8c44d0
4ecd0f1
f8c44d0
 
 
 
 
 
4ecd0f1
 
 
f8c44d0
c7be3b7
f8c44d0
4ecd0f1
f8c44d0
 
 
 
 
 
4ecd0f1
 
 
f8c44d0
 
 
 
 
 
 
 
 
 
4ecd0f1
 
 
 
f8c44d0
c7be3b7
f8c44d0
4ecd0f1
f8c44d0
 
 
 
 
 
4ecd0f1
 
 
f8c44d0
c7be3b7
f8c44d0
4ecd0f1
f8c44d0
 
 
 
 
 
4ecd0f1
 
 
f8c44d0
c7be3b7
f8c44d0
4ecd0f1
f8c44d0
 
 
 
 
 
4ecd0f1
 
 
f8c44d0
 
 
 
 
 
 
 
 
 
4ecd0f1
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
{
  "best_metric": 0.35275527834892273,
  "best_model_checkpoint": "./zephyr/09-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.18-KTO_Hyperparameter search, altering lora params for KTO task.-2_max_steps-145_batch_16_2024-04-09_ppid_9/checkpoint-100",
  "epoch": 0.684931506849315,
  "eval_steps": 50,
  "global_step": 100,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.14,
      "grad_norm": 6.346695899963379,
      "learning_rate": 0.00018142857142857142,
      "loss": 0.467,
      "step": 20,
      "train/kl": 6.117425918579102,
      "train/logps/chosen": -259.1842447916667,
      "train/logps/rejected": -290.1948988970588,
      "train/rewards/chosen": 0.930299072265625,
      "train/rewards/margins": 0.5634524266860065,
      "train/rewards/rejected": 0.36684664557961855
    },
    {
      "epoch": 0.27,
      "grad_norm": 4.604153633117676,
      "learning_rate": 0.00015285714285714287,
      "loss": 0.4224,
      "step": 40,
      "train/kl": 6.080809116363525,
      "train/logps/chosen": -274.32459677419354,
      "train/logps/rejected": -291.6558948863636,
      "train/rewards/chosen": 0.9791939027847782,
      "train/rewards/margins": 1.6478286295692244,
      "train/rewards/rejected": -0.668634726784446
    },
    {
      "epoch": 0.34,
      "eval/kl": 7.511639595031738,
      "eval/logps/chosen": -263.8732394366197,
      "eval/logps/rejected": -268.6064082278481,
      "eval/rewards/chosen": 1.9797810299295775,
      "eval/rewards/margins": 2.403955568071142,
      "eval/rewards/rejected": -0.42417453814156447,
      "eval_loss": 0.3916032016277313,
      "eval_runtime": 141.678,
      "eval_samples_per_second": 2.117,
      "eval_steps_per_second": 0.529,
      "step": 50
    },
    {
      "epoch": 0.41,
      "grad_norm": 5.5113444328308105,
      "learning_rate": 0.00012428571428571428,
      "loss": 0.3832,
      "step": 60,
      "train/kl": 16.34114646911621,
      "train/logps/chosen": -240.63917267628204,
      "train/logps/rejected": -277.463486089939,
      "train/rewards/chosen": 3.0404166197165465,
      "train/rewards/margins": 2.3469540618075815,
      "train/rewards/rejected": 0.6934625579089653
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.7619431018829346,
      "learning_rate": 9.571428571428573e-05,
      "loss": 0.3233,
      "step": 80,
      "train/kl": 0.7871202826499939,
      "train/logps/chosen": -294.5624213506711,
      "train/logps/rejected": -336.1468612938597,
      "train/rewards/chosen": 0.5479572987396445,
      "train/rewards/margins": 5.502926202933724,
      "train/rewards/rejected": -4.954968904194079
    },
    {
      "epoch": 0.68,
      "grad_norm": 4.996425151824951,
      "learning_rate": 6.714285714285714e-05,
      "loss": 0.3749,
      "step": 100,
      "train/kl": 6.590612888336182,
      "train/logps/chosen": -265.2372325922819,
      "train/logps/rejected": -300.5130665204678,
      "train/rewards/chosen": 2.635832920970533,
      "train/rewards/margins": 3.7929440163766914,
      "train/rewards/rejected": -1.1571110954061585
    },
    {
      "epoch": 0.68,
      "eval/kl": 2.193509817123413,
      "eval/logps/chosen": -268.1729478433099,
      "eval/logps/rejected": -291.57960838607596,
      "eval/rewards/chosen": 1.5498130690883583,
      "eval/rewards/margins": 4.271308299075008,
      "eval/rewards/rejected": -2.7214952299866497,
      "eval_loss": 0.35275527834892273,
      "eval_runtime": 141.6617,
      "eval_samples_per_second": 2.118,
      "eval_steps_per_second": 0.529,
      "step": 100
    }
  ],
  "logging_steps": 20,
  "max_steps": 145,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "total_flos": 0.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}