jiuhai commited on
Commit
87c25a7
1 Parent(s): ea1eca1

Training in progress, epoch 2

Browse files
README.md CHANGED
@@ -18,15 +18,15 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the HuggingFaceH4/ultrafeedback_binarized dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.6488
22
- - Rewards/chosen: 0.0341
23
- - Rewards/rejected: -0.0820
24
- - Rewards/accuracies: 0.7109
25
- - Rewards/margins: 0.1161
26
- - Logps/rejected: -224.8079
27
- - Logps/chosen: -271.6428
28
- - Logits/rejected: -3.0562
29
- - Logits/chosen: -3.0761
30
 
31
  ## Model description
32
 
@@ -62,12 +62,12 @@ The following hyperparameters were used during training:
62
 
63
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
64
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
65
- | 0.6488 | 1.0 | 485 | 0.6488 | 0.0341 | -0.0820 | 0.7109 | 0.1161 | -224.8079 | -271.6428 | -3.0562 | -3.0761 |
66
 
67
 
68
  ### Framework versions
69
 
70
  - Transformers 4.35.0
71
- - Pytorch 2.1.0+cu121
72
  - Datasets 2.14.6
73
  - Tokenizers 0.14.1
 
18
 
19
  This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the HuggingFaceH4/ultrafeedback_binarized dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.5650
22
+ - Rewards/chosen: 0.0816
23
+ - Rewards/rejected: -0.2564
24
+ - Rewards/accuracies: 0.7695
25
+ - Rewards/margins: 0.3380
26
+ - Logps/rejected: -175.5244
27
+ - Logps/chosen: -271.4002
28
+ - Logits/rejected: -3.0699
29
+ - Logits/chosen: -3.0344
30
 
31
  ## Model description
32
 
 
62
 
63
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
64
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
65
+ | 0.482 | 1.0 | 485 | 0.5650 | 0.0816 | -0.2564 | 0.7695 | 0.3380 | -175.5244 | -271.4002 | -3.0699 | -3.0344 |
66
 
67
 
68
  ### Framework versions
69
 
70
  - Transformers 4.35.0
71
+ - Pytorch 2.1.1+cu121
72
  - Datasets 2.14.6
73
  - Tokenizers 0.14.1
adapter_config.json CHANGED
@@ -16,10 +16,10 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "q_proj",
20
- "k_proj",
21
  "v_proj",
22
- "o_proj"
 
 
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
 
 
19
  "v_proj",
20
+ "o_proj",
21
+ "q_proj",
22
+ "k_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66d77ec56d43708a60162692263571a3ce844cbf97a6b876e051e1f68a3c50a6
3
  size 109086672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d078e6f25bcc0098b398028c055fd9e8c6049ebcaaf725b429870480ca5b84c7
3
  size 109086672
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -3.07612943649292,
4
- "eval_logits/rejected": -3.056239604949951,
5
- "eval_logps/chosen": -271.6427917480469,
6
- "eval_logps/rejected": -224.8079376220703,
7
- "eval_loss": 0.6488261818885803,
8
- "eval_rewards/accuracies": 0.7109375,
9
- "eval_rewards/chosen": 0.034067459404468536,
10
- "eval_rewards/margins": 0.11610361933708191,
11
- "eval_rewards/rejected": -0.08203616738319397,
12
- "eval_runtime": 254.1478,
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 7.869,
15
  "eval_steps_per_second": 0.063,
16
- "train_loss": 0.6667533972828659,
17
- "train_runtime": 15505.6746,
18
- "train_samples": 61966,
19
- "train_samples_per_second": 3.996,
20
  "train_steps_per_second": 0.031
21
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -3.034407377243042,
4
+ "eval_logits/rejected": -3.069913864135742,
5
+ "eval_logps/chosen": -271.40020751953125,
6
+ "eval_logps/rejected": -175.5244140625,
7
+ "eval_loss": 0.5650191903114319,
8
+ "eval_rewards/accuracies": 0.76953125,
9
+ "eval_rewards/chosen": 0.08157022297382355,
10
+ "eval_rewards/margins": 0.33799096941947937,
11
+ "eval_rewards/rejected": -0.25642073154449463,
12
+ "eval_runtime": 254.1285,
13
  "eval_samples": 2000,
14
+ "eval_samples_per_second": 7.87,
15
  "eval_steps_per_second": 0.063,
16
+ "train_loss": 0.5539181610972611,
17
+ "train_runtime": 15602.6148,
18
+ "train_samples": 62064,
19
+ "train_samples_per_second": 3.978,
20
  "train_steps_per_second": 0.031
21
  }
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": -3.07612943649292,
4
- "eval_logits/rejected": -3.056239604949951,
5
- "eval_logps/chosen": -271.6427917480469,
6
- "eval_logps/rejected": -224.8079376220703,
7
- "eval_loss": 0.6488261818885803,
8
- "eval_rewards/accuracies": 0.7109375,
9
- "eval_rewards/chosen": 0.034067459404468536,
10
- "eval_rewards/margins": 0.11610361933708191,
11
- "eval_rewards/rejected": -0.08203616738319397,
12
- "eval_runtime": 254.1478,
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 7.869,
15
  "eval_steps_per_second": 0.063
16
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_logits/chosen": -3.034407377243042,
4
+ "eval_logits/rejected": -3.069913864135742,
5
+ "eval_logps/chosen": -271.40020751953125,
6
+ "eval_logps/rejected": -175.5244140625,
7
+ "eval_loss": 0.5650191903114319,
8
+ "eval_rewards/accuracies": 0.76953125,
9
+ "eval_rewards/chosen": 0.08157022297382355,
10
+ "eval_rewards/margins": 0.33799096941947937,
11
+ "eval_rewards/rejected": -0.25642073154449463,
12
+ "eval_runtime": 254.1285,
13
  "eval_samples": 2000,
14
+ "eval_samples_per_second": 7.87,
15
  "eval_steps_per_second": 0.063
16
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.6667533972828659,
4
- "train_runtime": 15505.6746,
5
- "train_samples": 61966,
6
- "train_samples_per_second": 3.996,
7
  "train_steps_per_second": 0.031
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.5539181610972611,
4
+ "train_runtime": 15602.6148,
5
+ "train_samples": 62064,
6
+ "train_samples_per_second": 3.978,
7
  "train_steps_per_second": 0.031
8
  }
trainer_state.json CHANGED
@@ -11,10 +11,10 @@
11
  {
12
  "epoch": 0.0,
13
  "learning_rate": 1.020408163265306e-08,
14
- "logits/chosen": -2.891636610031128,
15
- "logits/rejected": -2.8851490020751953,
16
- "logps/chosen": -135.91143798828125,
17
- "logps/rejected": -101.67433166503906,
18
  "loss": 0.6931,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
@@ -25,698 +25,698 @@
25
  {
26
  "epoch": 0.02,
27
  "learning_rate": 1.0204081632653061e-07,
28
- "logits/chosen": -2.9867801666259766,
29
- "logits/rejected": -3.007345199584961,
30
- "logps/chosen": -309.9524230957031,
31
- "logps/rejected": -272.5204162597656,
32
- "loss": 0.6926,
33
- "rewards/accuracies": 0.4444444477558136,
34
- "rewards/chosen": -0.00014034591731615365,
35
- "rewards/margins": 0.0023386774118989706,
36
- "rewards/rejected": -0.002479023300111294,
37
  "step": 10
38
  },
39
  {
40
  "epoch": 0.04,
41
  "learning_rate": 2.0408163265306121e-07,
42
- "logits/chosen": -3.0750911235809326,
43
- "logits/rejected": -3.0683979988098145,
44
- "logps/chosen": -282.82012939453125,
45
- "logps/rejected": -249.6508331298828,
46
- "loss": 0.6922,
47
- "rewards/accuracies": 0.48124998807907104,
48
- "rewards/chosen": -0.0003494807460810989,
49
- "rewards/margins": 0.0025773285888135433,
50
- "rewards/rejected": -0.002926809247583151,
51
  "step": 20
52
  },
53
  {
54
  "epoch": 0.06,
55
  "learning_rate": 3.0612244897959183e-07,
56
- "logits/chosen": -3.072047710418701,
57
- "logits/rejected": -3.0354340076446533,
58
- "logps/chosen": -280.91827392578125,
59
- "logps/rejected": -199.9836883544922,
60
- "loss": 0.6943,
61
- "rewards/accuracies": 0.518750011920929,
62
- "rewards/chosen": 0.0013565481640398502,
63
- "rewards/margins": -0.0006766369333490729,
64
- "rewards/rejected": 0.0020331847481429577,
65
  "step": 30
66
  },
67
  {
68
  "epoch": 0.08,
69
  "learning_rate": 4.0816326530612243e-07,
70
- "logits/chosen": -3.0383801460266113,
71
- "logits/rejected": -3.035770893096924,
72
- "logps/chosen": -290.04510498046875,
73
- "logps/rejected": -238.2515106201172,
74
- "loss": 0.6927,
75
- "rewards/accuracies": 0.4625000059604645,
76
- "rewards/chosen": 0.0009484182810410857,
77
- "rewards/margins": 0.002096892800182104,
78
- "rewards/rejected": -0.001148474169895053,
79
  "step": 40
80
  },
81
  {
82
  "epoch": 0.1,
83
  "learning_rate": 4.988532110091743e-07,
84
- "logits/chosen": -3.037513017654419,
85
- "logits/rejected": -3.0122437477111816,
86
- "logps/chosen": -296.66009521484375,
87
- "logps/rejected": -217.6807861328125,
88
- "loss": 0.6896,
89
- "rewards/accuracies": 0.550000011920929,
90
- "rewards/chosen": 0.006595917046070099,
91
- "rewards/margins": 0.010886356234550476,
92
- "rewards/rejected": -0.004290440119802952,
93
  "step": 50
94
  },
95
  {
96
  "epoch": 0.12,
97
  "learning_rate": 4.873853211009174e-07,
98
- "logits/chosen": -3.0581448078155518,
99
- "logits/rejected": -3.0058109760284424,
100
- "logps/chosen": -297.3258361816406,
101
- "logps/rejected": -242.928466796875,
102
- "loss": 0.6895,
103
- "rewards/accuracies": 0.53125,
104
- "rewards/chosen": 0.0021637417376041412,
105
- "rewards/margins": 0.0060836682096123695,
106
- "rewards/rejected": -0.003919926937669516,
107
  "step": 60
108
  },
109
  {
110
  "epoch": 0.14,
111
  "learning_rate": 4.7591743119266054e-07,
112
- "logits/chosen": -3.0567173957824707,
113
- "logits/rejected": -3.056859254837036,
114
- "logps/chosen": -281.0410461425781,
115
- "logps/rejected": -256.200927734375,
116
- "loss": 0.6886,
117
- "rewards/accuracies": 0.574999988079071,
118
- "rewards/chosen": 0.006569950375705957,
119
- "rewards/margins": 0.011877616867423058,
120
- "rewards/rejected": -0.005307666026055813,
121
  "step": 70
122
  },
123
  {
124
  "epoch": 0.16,
125
  "learning_rate": 4.644495412844037e-07,
126
- "logits/chosen": -3.0421910285949707,
127
- "logits/rejected": -3.0536134243011475,
128
- "logps/chosen": -299.5488586425781,
129
- "logps/rejected": -233.7873077392578,
130
- "loss": 0.6871,
131
- "rewards/accuracies": 0.518750011920929,
132
- "rewards/chosen": 0.004993592854589224,
133
- "rewards/margins": 0.014339953660964966,
134
- "rewards/rejected": -0.00934636127203703,
135
  "step": 80
136
  },
137
  {
138
  "epoch": 0.19,
139
  "learning_rate": 4.5298165137614677e-07,
140
- "logits/chosen": -3.029874324798584,
141
- "logits/rejected": -3.018759250640869,
142
- "logps/chosen": -253.44351196289062,
143
- "logps/rejected": -201.76646423339844,
144
- "loss": 0.6854,
145
- "rewards/accuracies": 0.581250011920929,
146
- "rewards/chosen": 0.003981114365160465,
147
- "rewards/margins": 0.012469857931137085,
148
- "rewards/rejected": -0.008488742634654045,
149
  "step": 90
150
  },
151
  {
152
  "epoch": 0.21,
153
  "learning_rate": 4.4151376146788986e-07,
154
- "logits/chosen": -3.0842769145965576,
155
- "logits/rejected": -3.086177349090576,
156
- "logps/chosen": -258.80426025390625,
157
- "logps/rejected": -233.66793823242188,
158
- "loss": 0.6863,
159
- "rewards/accuracies": 0.6000000238418579,
160
- "rewards/chosen": 0.004184984136372805,
161
- "rewards/margins": 0.017948109656572342,
162
- "rewards/rejected": -0.01376312505453825,
163
  "step": 100
164
  },
165
  {
166
  "epoch": 0.23,
167
  "learning_rate": 4.30045871559633e-07,
168
- "logits/chosen": -3.050504207611084,
169
- "logits/rejected": -3.0392251014709473,
170
- "logps/chosen": -275.3699035644531,
171
- "logps/rejected": -228.40451049804688,
172
- "loss": 0.6809,
173
- "rewards/accuracies": 0.637499988079071,
174
- "rewards/chosen": 0.007297619245946407,
175
- "rewards/margins": 0.02423209697008133,
176
- "rewards/rejected": -0.016934476792812347,
177
  "step": 110
178
  },
179
  {
180
  "epoch": 0.25,
181
  "learning_rate": 4.1857798165137613e-07,
182
- "logits/chosen": -3.0525004863739014,
183
- "logits/rejected": -3.024714946746826,
184
- "logps/chosen": -295.898681640625,
185
- "logps/rejected": -199.49343872070312,
186
- "loss": 0.6804,
187
- "rewards/accuracies": 0.5874999761581421,
188
- "rewards/chosen": 0.007169491611421108,
189
- "rewards/margins": 0.02695578895509243,
190
- "rewards/rejected": -0.019786298274993896,
191
  "step": 120
192
  },
193
  {
194
  "epoch": 0.27,
195
  "learning_rate": 4.071100917431192e-07,
196
- "logits/chosen": -3.026671886444092,
197
- "logits/rejected": -3.028925657272339,
198
- "logps/chosen": -268.51568603515625,
199
- "logps/rejected": -220.26260375976562,
200
- "loss": 0.6796,
201
- "rewards/accuracies": 0.6812499761581421,
202
- "rewards/chosen": 0.01651640608906746,
203
- "rewards/margins": 0.03892368823289871,
204
- "rewards/rejected": -0.022407282143831253,
205
  "step": 130
206
  },
207
  {
208
  "epoch": 0.29,
209
  "learning_rate": 3.9564220183486236e-07,
210
- "logits/chosen": -3.0486905574798584,
211
- "logits/rejected": -3.038287878036499,
212
- "logps/chosen": -287.53082275390625,
213
- "logps/rejected": -222.7250213623047,
214
- "loss": 0.6761,
215
- "rewards/accuracies": 0.625,
216
- "rewards/chosen": 0.015277748927474022,
217
- "rewards/margins": 0.03703855723142624,
218
- "rewards/rejected": -0.021760808303952217,
219
  "step": 140
220
  },
221
  {
222
  "epoch": 0.31,
223
  "learning_rate": 3.841743119266055e-07,
224
- "logits/chosen": -3.0499072074890137,
225
- "logits/rejected": -2.9781885147094727,
226
- "logps/chosen": -259.89739990234375,
227
- "logps/rejected": -237.8246307373047,
228
- "loss": 0.6766,
229
- "rewards/accuracies": 0.606249988079071,
230
- "rewards/chosen": 0.008082658052444458,
231
- "rewards/margins": 0.022740600630640984,
232
- "rewards/rejected": -0.014657942578196526,
233
  "step": 150
234
  },
235
  {
236
  "epoch": 0.33,
237
  "learning_rate": 3.7270642201834864e-07,
238
- "logits/chosen": -3.0514883995056152,
239
- "logits/rejected": -3.022423505783081,
240
- "logps/chosen": -309.00494384765625,
241
- "logps/rejected": -228.82583618164062,
242
- "loss": 0.6761,
243
- "rewards/accuracies": 0.6812499761581421,
244
- "rewards/chosen": 0.011194340884685516,
245
- "rewards/margins": 0.04575566574931145,
246
- "rewards/rejected": -0.03456132486462593,
247
  "step": 160
248
  },
249
  {
250
  "epoch": 0.35,
251
  "learning_rate": 3.612385321100918e-07,
252
- "logits/chosen": -3.0084691047668457,
253
- "logits/rejected": -3.011247158050537,
254
- "logps/chosen": -300.25762939453125,
255
- "logps/rejected": -233.0517120361328,
256
- "loss": 0.6723,
257
- "rewards/accuracies": 0.65625,
258
- "rewards/chosen": 0.017441127449274063,
259
- "rewards/margins": 0.04596921056509018,
260
- "rewards/rejected": -0.028528084978461266,
261
  "step": 170
262
  },
263
  {
264
  "epoch": 0.37,
265
  "learning_rate": 3.497706422018348e-07,
266
- "logits/chosen": -2.998293161392212,
267
- "logits/rejected": -3.03139591217041,
268
- "logps/chosen": -299.2662658691406,
269
- "logps/rejected": -239.52804565429688,
270
- "loss": 0.6677,
271
- "rewards/accuracies": 0.643750011920929,
272
- "rewards/chosen": 0.02176077291369438,
273
- "rewards/margins": 0.04461668059229851,
274
- "rewards/rejected": -0.022855903953313828,
275
  "step": 180
276
  },
277
  {
278
  "epoch": 0.39,
279
  "learning_rate": 3.3830275229357795e-07,
280
- "logits/chosen": -3.0845465660095215,
281
- "logits/rejected": -3.0398240089416504,
282
- "logps/chosen": -292.7340087890625,
283
- "logps/rejected": -236.06533813476562,
284
- "loss": 0.671,
285
- "rewards/accuracies": 0.7124999761581421,
286
- "rewards/chosen": 0.026021122932434082,
287
- "rewards/margins": 0.05270993709564209,
288
- "rewards/rejected": -0.02668880857527256,
289
  "step": 190
290
  },
291
  {
292
  "epoch": 0.41,
293
  "learning_rate": 3.268348623853211e-07,
294
- "logits/chosen": -3.0351808071136475,
295
- "logits/rejected": -3.048321485519409,
296
- "logps/chosen": -269.8604736328125,
297
- "logps/rejected": -221.87197875976562,
298
- "loss": 0.6699,
299
- "rewards/accuracies": 0.65625,
300
- "rewards/chosen": 0.023518767207860947,
301
- "rewards/margins": 0.06251207739114761,
302
- "rewards/rejected": -0.03899329900741577,
303
  "step": 200
304
  },
305
  {
306
  "epoch": 0.43,
307
  "learning_rate": 3.1536697247706423e-07,
308
- "logits/chosen": -3.0768158435821533,
309
- "logits/rejected": -3.083721160888672,
310
- "logps/chosen": -282.7914733886719,
311
- "logps/rejected": -258.88677978515625,
312
- "loss": 0.6694,
313
- "rewards/accuracies": 0.65625,
314
- "rewards/chosen": 0.017539020627737045,
315
- "rewards/margins": 0.06356575340032578,
316
- "rewards/rejected": -0.04602673649787903,
317
  "step": 210
318
  },
319
  {
320
  "epoch": 0.45,
321
  "learning_rate": 3.038990825688073e-07,
322
- "logits/chosen": -3.031602621078491,
323
- "logits/rejected": -3.0251471996307373,
324
- "logps/chosen": -291.6885681152344,
325
- "logps/rejected": -229.2044219970703,
326
- "loss": 0.6661,
327
- "rewards/accuracies": 0.6875,
328
- "rewards/chosen": 0.019487539306282997,
329
- "rewards/margins": 0.06684577465057373,
330
- "rewards/rejected": -0.047358229756355286,
331
  "step": 220
332
  },
333
  {
334
  "epoch": 0.47,
335
  "learning_rate": 2.9243119266055045e-07,
336
- "logits/chosen": -3.0594446659088135,
337
- "logits/rejected": -3.0538389682769775,
338
- "logps/chosen": -296.71978759765625,
339
- "logps/rejected": -232.9663543701172,
340
- "loss": 0.6672,
341
- "rewards/accuracies": 0.6625000238418579,
342
- "rewards/chosen": 0.012631967663764954,
343
- "rewards/margins": 0.05379491299390793,
344
- "rewards/rejected": -0.04116294905543327,
345
  "step": 230
346
  },
347
  {
348
  "epoch": 0.49,
349
  "learning_rate": 2.809633027522936e-07,
350
- "logits/chosen": -3.009617805480957,
351
- "logits/rejected": -3.0026957988739014,
352
- "logps/chosen": -244.1639862060547,
353
- "logps/rejected": -207.7158203125,
354
- "loss": 0.6633,
355
- "rewards/accuracies": 0.675000011920929,
356
- "rewards/chosen": 0.01613594964146614,
357
- "rewards/margins": 0.0652671605348587,
358
- "rewards/rejected": -0.049131207168102264,
359
  "step": 240
360
  },
361
  {
362
  "epoch": 0.52,
363
  "learning_rate": 2.6949541284403673e-07,
364
- "logits/chosen": -3.0107674598693848,
365
- "logits/rejected": -3.012376308441162,
366
- "logps/chosen": -287.5134582519531,
367
- "logps/rejected": -248.5124053955078,
368
- "loss": 0.6593,
369
- "rewards/accuracies": 0.6625000238418579,
370
- "rewards/chosen": 0.017785798758268356,
371
- "rewards/margins": 0.0654246062040329,
372
- "rewards/rejected": -0.04763881862163544,
373
  "step": 250
374
  },
375
  {
376
  "epoch": 0.54,
377
  "learning_rate": 2.5802752293577976e-07,
378
- "logits/chosen": -3.014228105545044,
379
- "logits/rejected": -2.980214834213257,
380
- "logps/chosen": -297.2572021484375,
381
- "logps/rejected": -246.603515625,
382
- "loss": 0.6592,
383
- "rewards/accuracies": 0.699999988079071,
384
- "rewards/chosen": 0.016676222905516624,
385
- "rewards/margins": 0.07184126228094101,
386
- "rewards/rejected": -0.055165041238069534,
387
  "step": 260
388
  },
389
  {
390
  "epoch": 0.56,
391
  "learning_rate": 2.465596330275229e-07,
392
- "logits/chosen": -3.016359329223633,
393
- "logits/rejected": -3.0183348655700684,
394
- "logps/chosen": -261.7985534667969,
395
- "logps/rejected": -230.5518341064453,
396
- "loss": 0.6631,
397
- "rewards/accuracies": 0.6499999761581421,
398
- "rewards/chosen": 0.03012824058532715,
399
- "rewards/margins": 0.07465063035488129,
400
- "rewards/rejected": -0.04452239349484444,
401
  "step": 270
402
  },
403
  {
404
  "epoch": 0.58,
405
  "learning_rate": 2.3509174311926604e-07,
406
- "logits/chosen": -3.062016487121582,
407
- "logits/rejected": -3.08595871925354,
408
- "logps/chosen": -271.8788757324219,
409
- "logps/rejected": -242.447509765625,
410
- "loss": 0.6615,
411
- "rewards/accuracies": 0.675000011920929,
412
- "rewards/chosen": 0.03031134605407715,
413
- "rewards/margins": 0.06919924914836884,
414
- "rewards/rejected": -0.03888789564371109,
415
  "step": 280
416
  },
417
  {
418
  "epoch": 0.6,
419
  "learning_rate": 2.2362385321100916e-07,
420
- "logits/chosen": -3.065378427505493,
421
- "logits/rejected": -3.067957639694214,
422
- "logps/chosen": -293.88592529296875,
423
- "logps/rejected": -247.717529296875,
424
- "loss": 0.6595,
425
- "rewards/accuracies": 0.6000000238418579,
426
- "rewards/chosen": 0.018432429060339928,
427
- "rewards/margins": 0.06363337486982346,
428
- "rewards/rejected": -0.04520093649625778,
429
  "step": 290
430
  },
431
  {
432
  "epoch": 0.62,
433
  "learning_rate": 2.121559633027523e-07,
434
- "logits/chosen": -3.0399653911590576,
435
- "logits/rejected": -3.050255298614502,
436
- "logps/chosen": -248.15798950195312,
437
- "logps/rejected": -231.6765594482422,
438
- "loss": 0.6583,
439
- "rewards/accuracies": 0.7124999761581421,
440
- "rewards/chosen": 0.011154914274811745,
441
- "rewards/margins": 0.07162971049547195,
442
- "rewards/rejected": -0.060474805533885956,
443
  "step": 300
444
  },
445
  {
446
  "epoch": 0.64,
447
  "learning_rate": 2.0068807339449538e-07,
448
- "logits/chosen": -3.0448567867279053,
449
- "logits/rejected": -3.0284125804901123,
450
- "logps/chosen": -259.03173828125,
451
- "logps/rejected": -213.7626190185547,
452
- "loss": 0.6551,
453
- "rewards/accuracies": 0.706250011920929,
454
- "rewards/chosen": 0.03144986182451248,
455
- "rewards/margins": 0.09296337515115738,
456
- "rewards/rejected": -0.061513520777225494,
457
  "step": 310
458
  },
459
  {
460
  "epoch": 0.66,
461
  "learning_rate": 1.8922018348623852e-07,
462
- "logits/chosen": -3.069314479827881,
463
- "logits/rejected": -3.0522797107696533,
464
- "logps/chosen": -247.6428680419922,
465
- "logps/rejected": -224.86416625976562,
466
- "loss": 0.6537,
467
- "rewards/accuracies": 0.6875,
468
- "rewards/chosen": 0.03007657267153263,
469
- "rewards/margins": 0.09640363603830338,
470
- "rewards/rejected": -0.0663270577788353,
471
  "step": 320
472
  },
473
  {
474
  "epoch": 0.68,
475
  "learning_rate": 1.7775229357798163e-07,
476
- "logits/chosen": -3.0534980297088623,
477
- "logits/rejected": -3.0750725269317627,
478
- "logps/chosen": -292.9278564453125,
479
- "logps/rejected": -239.49560546875,
480
- "loss": 0.654,
481
- "rewards/accuracies": 0.6812499761581421,
482
- "rewards/chosen": 0.03384874761104584,
483
- "rewards/margins": 0.09248127043247223,
484
- "rewards/rejected": -0.05863253027200699,
485
  "step": 330
486
  },
487
  {
488
  "epoch": 0.7,
489
  "learning_rate": 1.6628440366972477e-07,
490
- "logits/chosen": -3.0116381645202637,
491
- "logits/rejected": -3.012748956680298,
492
- "logps/chosen": -310.517822265625,
493
- "logps/rejected": -256.17578125,
494
- "loss": 0.6583,
495
- "rewards/accuracies": 0.65625,
496
- "rewards/chosen": 0.023617586120963097,
497
- "rewards/margins": 0.07958104461431503,
498
- "rewards/rejected": -0.055963464081287384,
499
  "step": 340
500
  },
501
  {
502
  "epoch": 0.72,
503
  "learning_rate": 1.5481651376146786e-07,
504
- "logits/chosen": -3.015288829803467,
505
- "logits/rejected": -3.035534381866455,
506
- "logps/chosen": -238.67788696289062,
507
- "logps/rejected": -216.6863250732422,
508
- "loss": 0.6575,
509
- "rewards/accuracies": 0.6812499761581421,
510
- "rewards/chosen": 0.016075262799859047,
511
- "rewards/margins": 0.07468070089817047,
512
- "rewards/rejected": -0.05860542505979538,
513
  "step": 350
514
  },
515
  {
516
  "epoch": 0.74,
517
  "learning_rate": 1.43348623853211e-07,
518
- "logits/chosen": -2.999647855758667,
519
- "logits/rejected": -2.999812602996826,
520
- "logps/chosen": -260.34814453125,
521
- "logps/rejected": -228.0465545654297,
522
- "loss": 0.6576,
523
- "rewards/accuracies": 0.6937500238418579,
524
- "rewards/chosen": 0.010297578759491444,
525
- "rewards/margins": 0.07942849397659302,
526
- "rewards/rejected": -0.06913091242313385,
527
  "step": 360
528
  },
529
  {
530
  "epoch": 0.76,
531
  "learning_rate": 1.318807339449541e-07,
532
- "logits/chosen": -3.029534101486206,
533
- "logits/rejected": -3.0314173698425293,
534
- "logps/chosen": -284.08721923828125,
535
- "logps/rejected": -248.7538604736328,
536
- "loss": 0.6532,
537
- "rewards/accuracies": 0.7250000238418579,
538
- "rewards/chosen": 0.036643363535404205,
539
- "rewards/margins": 0.10196901857852936,
540
- "rewards/rejected": -0.06532564014196396,
541
  "step": 370
542
  },
543
  {
544
  "epoch": 0.78,
545
  "learning_rate": 1.2041284403669725e-07,
546
- "logits/chosen": -3.0199809074401855,
547
- "logits/rejected": -3.012413501739502,
548
- "logps/chosen": -237.28573608398438,
549
- "logps/rejected": -243.95590209960938,
550
- "loss": 0.6538,
551
- "rewards/accuracies": 0.625,
552
- "rewards/chosen": 0.012126882560551167,
553
- "rewards/margins": 0.08107715100049973,
554
- "rewards/rejected": -0.06895027309656143,
555
  "step": 380
556
  },
557
  {
558
  "epoch": 0.8,
559
  "learning_rate": 1.0894495412844036e-07,
560
- "logits/chosen": -2.9979848861694336,
561
- "logits/rejected": -3.015382766723633,
562
- "logps/chosen": -312.633544921875,
563
- "logps/rejected": -235.29806518554688,
564
- "loss": 0.6503,
565
- "rewards/accuracies": 0.6875,
566
- "rewards/chosen": 0.013257297687232494,
567
- "rewards/margins": 0.09175875037908554,
568
- "rewards/rejected": -0.07850147038698196,
569
  "step": 390
570
  },
571
  {
572
  "epoch": 0.82,
573
  "learning_rate": 9.747706422018348e-08,
574
- "logits/chosen": -3.048086166381836,
575
- "logits/rejected": -3.0504872798919678,
576
- "logps/chosen": -278.25189208984375,
577
- "logps/rejected": -248.26510620117188,
578
- "loss": 0.6511,
579
- "rewards/accuracies": 0.6625000238418579,
580
- "rewards/chosen": 0.01932488940656185,
581
- "rewards/margins": 0.07586248964071274,
582
- "rewards/rejected": -0.05653759837150574,
583
  "step": 400
584
  },
585
  {
586
  "epoch": 0.85,
587
  "learning_rate": 8.60091743119266e-08,
588
- "logits/chosen": -3.0442748069763184,
589
- "logits/rejected": -3.0469086170196533,
590
- "logps/chosen": -291.3175048828125,
591
- "logps/rejected": -228.79153442382812,
592
- "loss": 0.6492,
593
- "rewards/accuracies": 0.768750011920929,
594
- "rewards/chosen": 0.026980062946677208,
595
- "rewards/margins": 0.10874257236719131,
596
- "rewards/rejected": -0.08176250755786896,
597
  "step": 410
598
  },
599
  {
600
  "epoch": 0.87,
601
  "learning_rate": 7.454128440366971e-08,
602
- "logits/chosen": -3.0467171669006348,
603
- "logits/rejected": -3.0507471561431885,
604
- "logps/chosen": -274.8234558105469,
605
- "logps/rejected": -227.38638305664062,
606
- "loss": 0.6509,
607
- "rewards/accuracies": 0.6812499761581421,
608
- "rewards/chosen": 0.021679330617189407,
609
- "rewards/margins": 0.10154237598180771,
610
- "rewards/rejected": -0.0798630565404892,
611
  "step": 420
612
  },
613
  {
614
  "epoch": 0.89,
615
  "learning_rate": 6.307339449541284e-08,
616
- "logits/chosen": -3.0327906608581543,
617
- "logits/rejected": -3.0403802394866943,
618
- "logps/chosen": -256.6832580566406,
619
- "logps/rejected": -259.84295654296875,
620
- "loss": 0.651,
621
- "rewards/accuracies": 0.6937500238418579,
622
- "rewards/chosen": 0.014505205675959587,
623
- "rewards/margins": 0.08562619239091873,
624
- "rewards/rejected": -0.07112099230289459,
625
  "step": 430
626
  },
627
  {
628
  "epoch": 0.91,
629
  "learning_rate": 5.1605504587155966e-08,
630
- "logits/chosen": -3.0034422874450684,
631
- "logits/rejected": -3.013791799545288,
632
- "logps/chosen": -301.6542053222656,
633
- "logps/rejected": -234.38381958007812,
634
- "loss": 0.6493,
635
- "rewards/accuracies": 0.699999988079071,
636
- "rewards/chosen": 0.01893182098865509,
637
- "rewards/margins": 0.12277624756097794,
638
- "rewards/rejected": -0.10384440422058105,
639
  "step": 440
640
  },
641
  {
642
  "epoch": 0.93,
643
  "learning_rate": 4.0137614678899086e-08,
644
- "logits/chosen": -3.0094974040985107,
645
- "logits/rejected": -2.9743194580078125,
646
- "logps/chosen": -290.7796630859375,
647
- "logps/rejected": -238.8968963623047,
648
- "loss": 0.6521,
649
- "rewards/accuracies": 0.699999988079071,
650
- "rewards/chosen": 0.029885241761803627,
651
- "rewards/margins": 0.11550422757863998,
652
- "rewards/rejected": -0.08561898022890091,
653
  "step": 450
654
  },
655
  {
656
  "epoch": 0.95,
657
  "learning_rate": 2.86697247706422e-08,
658
- "logits/chosen": -3.043740749359131,
659
- "logits/rejected": -3.035067081451416,
660
- "logps/chosen": -258.144287109375,
661
- "logps/rejected": -234.55081176757812,
662
- "loss": 0.6483,
663
- "rewards/accuracies": 0.6812499761581421,
664
- "rewards/chosen": 0.01103687472641468,
665
- "rewards/margins": 0.0873931497335434,
666
- "rewards/rejected": -0.07635627686977386,
667
  "step": 460
668
  },
669
  {
670
  "epoch": 0.97,
671
  "learning_rate": 1.720183486238532e-08,
672
- "logits/chosen": -3.0199711322784424,
673
- "logits/rejected": -3.026895761489868,
674
- "logps/chosen": -288.91119384765625,
675
- "logps/rejected": -242.3592071533203,
676
- "loss": 0.6511,
677
- "rewards/accuracies": 0.6812499761581421,
678
- "rewards/chosen": 0.03277132660150528,
679
- "rewards/margins": 0.10831280797719955,
680
- "rewards/rejected": -0.07554147392511368,
681
  "step": 470
682
  },
683
  {
684
  "epoch": 0.99,
685
  "learning_rate": 5.73394495412844e-09,
686
- "logits/chosen": -3.041350841522217,
687
- "logits/rejected": -3.058216094970703,
688
- "logps/chosen": -258.9853820800781,
689
- "logps/rejected": -226.7718048095703,
690
- "loss": 0.6488,
691
- "rewards/accuracies": 0.6875,
692
- "rewards/chosen": 0.022982869297266006,
693
- "rewards/margins": 0.09263849258422852,
694
- "rewards/rejected": -0.06965561956167221,
695
  "step": 480
696
  },
697
  {
698
  "epoch": 1.0,
699
- "eval_logits/chosen": -3.07612943649292,
700
- "eval_logits/rejected": -3.056239604949951,
701
- "eval_logps/chosen": -271.6427917480469,
702
- "eval_logps/rejected": -224.8079376220703,
703
- "eval_loss": 0.6488261818885803,
704
- "eval_rewards/accuracies": 0.7109375,
705
- "eval_rewards/chosen": 0.034067459404468536,
706
- "eval_rewards/margins": 0.11610361933708191,
707
- "eval_rewards/rejected": -0.08203616738319397,
708
- "eval_runtime": 255.1726,
709
- "eval_samples_per_second": 7.838,
710
- "eval_steps_per_second": 0.063,
711
  "step": 485
712
  },
713
  {
714
  "epoch": 1.0,
715
  "step": 485,
716
  "total_flos": 0.0,
717
- "train_loss": 0.6667533972828659,
718
- "train_runtime": 15505.6746,
719
- "train_samples_per_second": 3.996,
720
  "train_steps_per_second": 0.031
721
  }
722
  ],
 
11
  {
12
  "epoch": 0.0,
13
  "learning_rate": 1.020408163265306e-08,
14
+ "logits/chosen": -3.094454526901245,
15
+ "logits/rejected": -3.0498220920562744,
16
+ "logps/chosen": -242.99183654785156,
17
+ "logps/rejected": -74.66817474365234,
18
  "loss": 0.6931,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
 
25
  {
26
  "epoch": 0.02,
27
  "learning_rate": 1.0204081632653061e-07,
28
+ "logits/chosen": -3.032047986984253,
29
+ "logits/rejected": -3.029446840286255,
30
+ "logps/chosen": -290.1824645996094,
31
+ "logps/rejected": -75.82839965820312,
32
+ "loss": 0.6935,
33
+ "rewards/accuracies": 0.4027777910232544,
34
+ "rewards/chosen": -0.007104851305484772,
35
+ "rewards/margins": -0.0044839149340987206,
36
+ "rewards/rejected": -0.0026209354400634766,
37
  "step": 10
38
  },
39
  {
40
  "epoch": 0.04,
41
  "learning_rate": 2.0408163265306121e-07,
42
+ "logits/chosen": -2.9773757457733154,
43
+ "logits/rejected": -2.967517852783203,
44
+ "logps/chosen": -297.57342529296875,
45
+ "logps/rejected": -77.62318420410156,
46
+ "loss": 0.692,
47
+ "rewards/accuracies": 0.5625,
48
+ "rewards/chosen": 0.00020697650325018913,
49
+ "rewards/margins": 0.003021990181878209,
50
+ "rewards/rejected": -0.0028150142170488834,
51
  "step": 20
52
  },
53
  {
54
  "epoch": 0.06,
55
  "learning_rate": 3.0612244897959183e-07,
56
+ "logits/chosen": -2.983607769012451,
57
+ "logits/rejected": -2.9363152980804443,
58
+ "logps/chosen": -288.51458740234375,
59
+ "logps/rejected": -75.65086364746094,
60
+ "loss": 0.6892,
61
+ "rewards/accuracies": 0.5687500238418579,
62
+ "rewards/chosen": -0.0037677965592592955,
63
+ "rewards/margins": 0.004846884869039059,
64
+ "rewards/rejected": -0.008614679798483849,
65
  "step": 30
66
  },
67
  {
68
  "epoch": 0.08,
69
  "learning_rate": 4.0816326530612243e-07,
70
+ "logits/chosen": -3.0467514991760254,
71
+ "logits/rejected": -3.010239362716675,
72
+ "logps/chosen": -243.7971954345703,
73
+ "logps/rejected": -81.06056213378906,
74
+ "loss": 0.685,
75
+ "rewards/accuracies": 0.6499999761581421,
76
+ "rewards/chosen": 0.0063628097996115685,
77
+ "rewards/margins": 0.02118637040257454,
78
+ "rewards/rejected": -0.014823561534285545,
79
  "step": 40
80
  },
81
  {
82
  "epoch": 0.1,
83
  "learning_rate": 4.988532110091743e-07,
84
+ "logits/chosen": -3.0095317363739014,
85
+ "logits/rejected": -3.0367846488952637,
86
+ "logps/chosen": -251.5819854736328,
87
+ "logps/rejected": -78.19547271728516,
88
+ "loss": 0.6784,
89
+ "rewards/accuracies": 0.6499999761581421,
90
+ "rewards/chosen": 0.005416669882833958,
91
+ "rewards/margins": 0.023932188749313354,
92
+ "rewards/rejected": -0.018515516072511673,
93
  "step": 50
94
  },
95
  {
96
  "epoch": 0.12,
97
  "learning_rate": 4.873853211009174e-07,
98
+ "logits/chosen": -3.0116028785705566,
99
+ "logits/rejected": -3.0300631523132324,
100
+ "logps/chosen": -281.01361083984375,
101
+ "logps/rejected": -75.49365997314453,
102
+ "loss": 0.6715,
103
+ "rewards/accuracies": 0.8125,
104
+ "rewards/chosen": 0.015385298058390617,
105
+ "rewards/margins": 0.050571341067552567,
106
+ "rewards/rejected": -0.0351860448718071,
107
  "step": 60
108
  },
109
  {
110
  "epoch": 0.14,
111
  "learning_rate": 4.7591743119266054e-07,
112
+ "logits/chosen": -3.0327250957489014,
113
+ "logits/rejected": -3.0184121131896973,
114
+ "logps/chosen": -262.8722229003906,
115
+ "logps/rejected": -71.65990447998047,
116
+ "loss": 0.6649,
117
+ "rewards/accuracies": 0.831250011920929,
118
+ "rewards/chosen": 0.016824517399072647,
119
+ "rewards/margins": 0.06025807186961174,
120
+ "rewards/rejected": -0.043433547019958496,
121
  "step": 70
122
  },
123
  {
124
  "epoch": 0.16,
125
  "learning_rate": 4.644495412844037e-07,
126
+ "logits/chosen": -3.0364532470703125,
127
+ "logits/rejected": -2.988002300262451,
128
+ "logps/chosen": -254.49423217773438,
129
+ "logps/rejected": -70.27412414550781,
130
+ "loss": 0.6556,
131
+ "rewards/accuracies": 0.8500000238418579,
132
+ "rewards/chosen": 0.022701723501086235,
133
+ "rewards/margins": 0.07623252272605896,
134
+ "rewards/rejected": -0.05353079363703728,
135
  "step": 80
136
  },
137
  {
138
  "epoch": 0.19,
139
  "learning_rate": 4.5298165137614677e-07,
140
+ "logits/chosen": -3.068497657775879,
141
+ "logits/rejected": -3.0402565002441406,
142
+ "logps/chosen": -266.61614990234375,
143
+ "logps/rejected": -81.87393951416016,
144
+ "loss": 0.6455,
145
+ "rewards/accuracies": 0.8687499761581421,
146
+ "rewards/chosen": 0.026070792227983475,
147
+ "rewards/margins": 0.10358123481273651,
148
+ "rewards/rejected": -0.07751044631004333,
149
  "step": 90
150
  },
151
  {
152
  "epoch": 0.21,
153
  "learning_rate": 4.4151376146788986e-07,
154
+ "logits/chosen": -3.0521655082702637,
155
+ "logits/rejected": -3.057821750640869,
156
+ "logps/chosen": -286.0577087402344,
157
+ "logps/rejected": -77.96414947509766,
158
+ "loss": 0.6336,
159
+ "rewards/accuracies": 0.949999988079071,
160
+ "rewards/chosen": 0.033475782722234726,
161
+ "rewards/margins": 0.14013811945915222,
162
+ "rewards/rejected": -0.10666234791278839,
163
  "step": 100
164
  },
165
  {
166
  "epoch": 0.23,
167
  "learning_rate": 4.30045871559633e-07,
168
+ "logits/chosen": -3.003532886505127,
169
+ "logits/rejected": -2.995978355407715,
170
+ "logps/chosen": -276.5457458496094,
171
+ "logps/rejected": -80.02079010009766,
172
+ "loss": 0.6234,
173
+ "rewards/accuracies": 0.9375,
174
+ "rewards/chosen": 0.0331401564180851,
175
+ "rewards/margins": 0.14480046927928925,
176
+ "rewards/rejected": -0.11166031658649445,
177
  "step": 110
178
  },
179
  {
180
  "epoch": 0.25,
181
  "learning_rate": 4.1857798165137613e-07,
182
+ "logits/chosen": -3.0330376625061035,
183
+ "logits/rejected": -3.030214548110962,
184
+ "logps/chosen": -276.41632080078125,
185
+ "logps/rejected": -77.67643737792969,
186
+ "loss": 0.6164,
187
+ "rewards/accuracies": 0.9375,
188
+ "rewards/chosen": 0.043682295829057693,
189
+ "rewards/margins": 0.177944153547287,
190
+ "rewards/rejected": -0.1342618763446808,
191
  "step": 120
192
  },
193
  {
194
  "epoch": 0.27,
195
  "learning_rate": 4.071100917431192e-07,
196
+ "logits/chosen": -2.9754703044891357,
197
+ "logits/rejected": -2.9898681640625,
198
+ "logps/chosen": -283.3277587890625,
199
+ "logps/rejected": -83.87138366699219,
200
+ "loss": 0.6121,
201
+ "rewards/accuracies": 0.9312499761581421,
202
+ "rewards/chosen": 0.048630841076374054,
203
+ "rewards/margins": 0.19439519941806793,
204
+ "rewards/rejected": -0.14576435089111328,
205
  "step": 130
206
  },
207
  {
208
  "epoch": 0.29,
209
  "learning_rate": 3.9564220183486236e-07,
210
+ "logits/chosen": -3.0477757453918457,
211
+ "logits/rejected": -3.0237550735473633,
212
+ "logps/chosen": -291.98065185546875,
213
+ "logps/rejected": -82.53144073486328,
214
+ "loss": 0.5997,
215
+ "rewards/accuracies": 0.925000011920929,
216
+ "rewards/chosen": 0.034745730459690094,
217
+ "rewards/margins": 0.20989501476287842,
218
+ "rewards/rejected": -0.17514929175376892,
219
  "step": 140
220
  },
221
  {
222
  "epoch": 0.31,
223
  "learning_rate": 3.841743119266055e-07,
224
+ "logits/chosen": -3.033001661300659,
225
+ "logits/rejected": -3.015845775604248,
226
+ "logps/chosen": -289.15582275390625,
227
+ "logps/rejected": -76.08447265625,
228
+ "loss": 0.5925,
229
+ "rewards/accuracies": 0.9437500238418579,
230
+ "rewards/chosen": 0.0425817035138607,
231
+ "rewards/margins": 0.21189098060131073,
232
+ "rewards/rejected": -0.16930925846099854,
233
  "step": 150
234
  },
235
  {
236
  "epoch": 0.33,
237
  "learning_rate": 3.7270642201834864e-07,
238
+ "logits/chosen": -3.0720551013946533,
239
+ "logits/rejected": -3.0518932342529297,
240
+ "logps/chosen": -271.08258056640625,
241
+ "logps/rejected": -75.97576141357422,
242
+ "loss": 0.5874,
243
+ "rewards/accuracies": 0.956250011920929,
244
+ "rewards/chosen": 0.03000471368432045,
245
+ "rewards/margins": 0.20934228599071503,
246
+ "rewards/rejected": -0.17933759093284607,
247
  "step": 160
248
  },
249
  {
250
  "epoch": 0.35,
251
  "learning_rate": 3.612385321100918e-07,
252
+ "logits/chosen": -3.026865243911743,
253
+ "logits/rejected": -3.030813455581665,
254
+ "logps/chosen": -287.5133361816406,
255
+ "logps/rejected": -77.84892272949219,
256
+ "loss": 0.5811,
257
+ "rewards/accuracies": 0.949999988079071,
258
+ "rewards/chosen": 0.050167638808488846,
259
+ "rewards/margins": 0.24577708542346954,
260
+ "rewards/rejected": -0.1956094205379486,
261
  "step": 170
262
  },
263
  {
264
  "epoch": 0.37,
265
  "learning_rate": 3.497706422018348e-07,
266
+ "logits/chosen": -3.064037322998047,
267
+ "logits/rejected": -3.0434131622314453,
268
+ "logps/chosen": -270.81378173828125,
269
+ "logps/rejected": -78.64222717285156,
270
+ "loss": 0.5708,
271
+ "rewards/accuracies": 0.9750000238418579,
272
+ "rewards/chosen": 0.0572846345603466,
273
+ "rewards/margins": 0.27750909328460693,
274
+ "rewards/rejected": -0.2202244997024536,
275
  "step": 180
276
  },
277
  {
278
  "epoch": 0.39,
279
  "learning_rate": 3.3830275229357795e-07,
280
+ "logits/chosen": -3.0381369590759277,
281
+ "logits/rejected": -3.031832456588745,
282
+ "logps/chosen": -273.7306823730469,
283
+ "logps/rejected": -79.31744384765625,
284
+ "loss": 0.5604,
285
+ "rewards/accuracies": 0.96875,
286
+ "rewards/chosen": 0.05553610250353813,
287
+ "rewards/margins": 0.29081013798713684,
288
+ "rewards/rejected": -0.2352740317583084,
289
  "step": 190
290
  },
291
  {
292
  "epoch": 0.41,
293
  "learning_rate": 3.268348623853211e-07,
294
+ "logits/chosen": -3.036811113357544,
295
+ "logits/rejected": -3.0287680625915527,
296
+ "logps/chosen": -266.4691467285156,
297
+ "logps/rejected": -77.38215637207031,
298
+ "loss": 0.5504,
299
+ "rewards/accuracies": 0.949999988079071,
300
+ "rewards/chosen": 0.08118367195129395,
301
+ "rewards/margins": 0.3425747752189636,
302
+ "rewards/rejected": -0.2613911032676697,
303
  "step": 200
304
  },
305
  {
306
  "epoch": 0.43,
307
  "learning_rate": 3.1536697247706423e-07,
308
+ "logits/chosen": -3.061699867248535,
309
+ "logits/rejected": -3.042888641357422,
310
+ "logps/chosen": -269.961181640625,
311
+ "logps/rejected": -89.21647644042969,
312
+ "loss": 0.5501,
313
+ "rewards/accuracies": 0.956250011920929,
314
+ "rewards/chosen": 0.07142322510480881,
315
+ "rewards/margins": 0.3240587115287781,
316
+ "rewards/rejected": -0.25263547897338867,
317
  "step": 210
318
  },
319
  {
320
  "epoch": 0.45,
321
  "learning_rate": 3.038990825688073e-07,
322
+ "logits/chosen": -3.04771089553833,
323
+ "logits/rejected": -3.018721103668213,
324
+ "logps/chosen": -250.44091796875,
325
+ "logps/rejected": -72.33317565917969,
326
+ "loss": 0.5488,
327
+ "rewards/accuracies": 0.9624999761581421,
328
+ "rewards/chosen": 0.06637217104434967,
329
+ "rewards/margins": 0.3276647627353668,
330
+ "rewards/rejected": -0.26129260659217834,
331
  "step": 220
332
  },
333
  {
334
  "epoch": 0.47,
335
  "learning_rate": 2.9243119266055045e-07,
336
+ "logits/chosen": -2.9626972675323486,
337
+ "logits/rejected": -2.9827158451080322,
338
+ "logps/chosen": -293.9212646484375,
339
+ "logps/rejected": -72.2821044921875,
340
+ "loss": 0.5313,
341
+ "rewards/accuracies": 0.981249988079071,
342
+ "rewards/chosen": 0.08349540829658508,
343
+ "rewards/margins": 0.3892216682434082,
344
+ "rewards/rejected": -0.30572623014450073,
345
  "step": 230
346
  },
347
  {
348
  "epoch": 0.49,
349
  "learning_rate": 2.809633027522936e-07,
350
+ "logits/chosen": -3.034790277481079,
351
+ "logits/rejected": -3.016634225845337,
352
+ "logps/chosen": -280.6105651855469,
353
+ "logps/rejected": -76.09197235107422,
354
+ "loss": 0.5333,
355
+ "rewards/accuracies": 0.9375,
356
+ "rewards/chosen": 0.08378176391124725,
357
+ "rewards/margins": 0.4068339467048645,
358
+ "rewards/rejected": -0.32305219769477844,
359
  "step": 240
360
  },
361
  {
362
  "epoch": 0.52,
363
  "learning_rate": 2.6949541284403673e-07,
364
+ "logits/chosen": -3.0789849758148193,
365
+ "logits/rejected": -3.0785841941833496,
366
+ "logps/chosen": -264.5536804199219,
367
+ "logps/rejected": -82.22047424316406,
368
+ "loss": 0.5282,
369
+ "rewards/accuracies": 0.9624999761581421,
370
+ "rewards/chosen": 0.06328760087490082,
371
+ "rewards/margins": 0.40200409293174744,
372
+ "rewards/rejected": -0.3387165069580078,
373
  "step": 250
374
  },
375
  {
376
  "epoch": 0.54,
377
  "learning_rate": 2.5802752293577976e-07,
378
+ "logits/chosen": -2.9741625785827637,
379
+ "logits/rejected": -2.9866743087768555,
380
+ "logps/chosen": -282.30902099609375,
381
+ "logps/rejected": -70.76858520507812,
382
+ "loss": 0.5277,
383
+ "rewards/accuracies": 0.9312499761581421,
384
+ "rewards/chosen": 0.10191468149423599,
385
+ "rewards/margins": 0.39590951800346375,
386
+ "rewards/rejected": -0.29399481415748596,
387
  "step": 260
388
  },
389
  {
390
  "epoch": 0.56,
391
  "learning_rate": 2.465596330275229e-07,
392
+ "logits/chosen": -3.032557964324951,
393
+ "logits/rejected": -3.03240704536438,
394
+ "logps/chosen": -274.0851135253906,
395
+ "logps/rejected": -86.98384094238281,
396
+ "loss": 0.5135,
397
+ "rewards/accuracies": 0.9375,
398
+ "rewards/chosen": 0.07479412853717804,
399
+ "rewards/margins": 0.4109489321708679,
400
+ "rewards/rejected": -0.3361548185348511,
401
  "step": 270
402
  },
403
  {
404
  "epoch": 0.58,
405
  "learning_rate": 2.3509174311926604e-07,
406
+ "logits/chosen": -3.060285806655884,
407
+ "logits/rejected": -2.9775302410125732,
408
+ "logps/chosen": -253.785888671875,
409
+ "logps/rejected": -70.39444732666016,
410
+ "loss": 0.5183,
411
+ "rewards/accuracies": 0.9624999761581421,
412
+ "rewards/chosen": 0.07235217839479446,
413
+ "rewards/margins": 0.3860532343387604,
414
+ "rewards/rejected": -0.31370100378990173,
415
  "step": 280
416
  },
417
  {
418
  "epoch": 0.6,
419
  "learning_rate": 2.2362385321100916e-07,
420
+ "logits/chosen": -3.029343843460083,
421
+ "logits/rejected": -3.0406129360198975,
422
+ "logps/chosen": -276.57196044921875,
423
+ "logps/rejected": -84.54597473144531,
424
+ "loss": 0.5107,
425
+ "rewards/accuracies": 0.9437500238418579,
426
+ "rewards/chosen": 0.08857797086238861,
427
+ "rewards/margins": 0.4803849756717682,
428
+ "rewards/rejected": -0.3918069899082184,
429
  "step": 290
430
  },
431
  {
432
  "epoch": 0.62,
433
  "learning_rate": 2.121559633027523e-07,
434
+ "logits/chosen": -2.9938578605651855,
435
+ "logits/rejected": -2.9954426288604736,
436
+ "logps/chosen": -273.7822265625,
437
+ "logps/rejected": -77.98421478271484,
438
+ "loss": 0.5079,
439
+ "rewards/accuracies": 0.956250011920929,
440
+ "rewards/chosen": 0.08799968659877777,
441
+ "rewards/margins": 0.40502768754959106,
442
+ "rewards/rejected": -0.3170279860496521,
443
  "step": 300
444
  },
445
  {
446
  "epoch": 0.64,
447
  "learning_rate": 2.0068807339449538e-07,
448
+ "logits/chosen": -3.052614212036133,
449
+ "logits/rejected": -3.0461201667785645,
450
+ "logps/chosen": -281.28814697265625,
451
+ "logps/rejected": -81.84606170654297,
452
+ "loss": 0.5038,
453
+ "rewards/accuracies": 0.956250011920929,
454
+ "rewards/chosen": 0.05326849967241287,
455
+ "rewards/margins": 0.46244749426841736,
456
+ "rewards/rejected": -0.4091789722442627,
457
  "step": 310
458
  },
459
  {
460
  "epoch": 0.66,
461
  "learning_rate": 1.8922018348623852e-07,
462
+ "logits/chosen": -3.031501054763794,
463
+ "logits/rejected": -3.042961597442627,
464
+ "logps/chosen": -271.274658203125,
465
+ "logps/rejected": -87.3827133178711,
466
+ "loss": 0.5003,
467
+ "rewards/accuracies": 0.9624999761581421,
468
+ "rewards/chosen": 0.07084844261407852,
469
+ "rewards/margins": 0.445441871881485,
470
+ "rewards/rejected": -0.37459343671798706,
471
  "step": 320
472
  },
473
  {
474
  "epoch": 0.68,
475
  "learning_rate": 1.7775229357798163e-07,
476
+ "logits/chosen": -3.0476019382476807,
477
+ "logits/rejected": -3.0447893142700195,
478
+ "logps/chosen": -249.735595703125,
479
+ "logps/rejected": -73.10395812988281,
480
+ "loss": 0.4976,
481
+ "rewards/accuracies": 0.925000011920929,
482
+ "rewards/chosen": 0.06198754906654358,
483
+ "rewards/margins": 0.43834322690963745,
484
+ "rewards/rejected": -0.37635567784309387,
485
  "step": 330
486
  },
487
  {
488
  "epoch": 0.7,
489
  "learning_rate": 1.6628440366972477e-07,
490
+ "logits/chosen": -3.055901288986206,
491
+ "logits/rejected": -3.0517029762268066,
492
+ "logps/chosen": -273.3477478027344,
493
+ "logps/rejected": -85.53290557861328,
494
+ "loss": 0.496,
495
+ "rewards/accuracies": 0.987500011920929,
496
+ "rewards/chosen": 0.08338963240385056,
497
+ "rewards/margins": 0.5042273998260498,
498
+ "rewards/rejected": -0.42083778977394104,
499
  "step": 340
500
  },
501
  {
502
  "epoch": 0.72,
503
  "learning_rate": 1.5481651376146786e-07,
504
+ "logits/chosen": -3.063744306564331,
505
+ "logits/rejected": -3.066366195678711,
506
+ "logps/chosen": -277.1488952636719,
507
+ "logps/rejected": -88.2572250366211,
508
+ "loss": 0.4931,
509
+ "rewards/accuracies": 0.987500011920929,
510
+ "rewards/chosen": 0.07289155572652817,
511
+ "rewards/margins": 0.5126849412918091,
512
+ "rewards/rejected": -0.4397934079170227,
513
  "step": 350
514
  },
515
  {
516
  "epoch": 0.74,
517
  "learning_rate": 1.43348623853211e-07,
518
+ "logits/chosen": -3.0237436294555664,
519
+ "logits/rejected": -3.0258359909057617,
520
+ "logps/chosen": -292.0096740722656,
521
+ "logps/rejected": -81.93167114257812,
522
+ "loss": 0.4951,
523
+ "rewards/accuracies": 0.9937499761581421,
524
+ "rewards/chosen": 0.07367613166570663,
525
+ "rewards/margins": 0.49797001481056213,
526
+ "rewards/rejected": -0.4242939352989197,
527
  "step": 360
528
  },
529
  {
530
  "epoch": 0.76,
531
  "learning_rate": 1.318807339449541e-07,
532
+ "logits/chosen": -2.9882092475891113,
533
+ "logits/rejected": -2.9637956619262695,
534
+ "logps/chosen": -274.551513671875,
535
+ "logps/rejected": -73.8973388671875,
536
+ "loss": 0.496,
537
+ "rewards/accuracies": 0.949999988079071,
538
+ "rewards/chosen": 0.0880483016371727,
539
+ "rewards/margins": 0.49274787306785583,
540
+ "rewards/rejected": -0.4046996533870697,
541
  "step": 370
542
  },
543
  {
544
  "epoch": 0.78,
545
  "learning_rate": 1.2041284403669725e-07,
546
+ "logits/chosen": -3.070621967315674,
547
+ "logits/rejected": -3.0683789253234863,
548
+ "logps/chosen": -266.607177734375,
549
+ "logps/rejected": -81.02775573730469,
550
+ "loss": 0.493,
551
+ "rewards/accuracies": 0.9750000238418579,
552
+ "rewards/chosen": 0.10891600698232651,
553
+ "rewards/margins": 0.5303564071655273,
554
+ "rewards/rejected": -0.42144036293029785,
555
  "step": 380
556
  },
557
  {
558
  "epoch": 0.8,
559
  "learning_rate": 1.0894495412844036e-07,
560
+ "logits/chosen": -3.0497114658355713,
561
+ "logits/rejected": -3.053192615509033,
562
+ "logps/chosen": -280.43218994140625,
563
+ "logps/rejected": -80.42735290527344,
564
+ "loss": 0.4892,
565
+ "rewards/accuracies": 0.9375,
566
+ "rewards/chosen": 0.10893626511096954,
567
+ "rewards/margins": 0.5605167746543884,
568
+ "rewards/rejected": -0.4515805244445801,
569
  "step": 390
570
  },
571
  {
572
  "epoch": 0.82,
573
  "learning_rate": 9.747706422018348e-08,
574
+ "logits/chosen": -3.002933979034424,
575
+ "logits/rejected": -3.0063657760620117,
576
+ "logps/chosen": -241.24276733398438,
577
+ "logps/rejected": -75.92924499511719,
578
+ "loss": 0.4833,
579
+ "rewards/accuracies": 0.956250011920929,
580
+ "rewards/chosen": 0.07781459391117096,
581
+ "rewards/margins": 0.46425342559814453,
582
+ "rewards/rejected": -0.38643890619277954,
583
  "step": 400
584
  },
585
  {
586
  "epoch": 0.85,
587
  "learning_rate": 8.60091743119266e-08,
588
+ "logits/chosen": -3.0454163551330566,
589
+ "logits/rejected": -3.035583972930908,
590
+ "logps/chosen": -264.18585205078125,
591
+ "logps/rejected": -78.031982421875,
592
+ "loss": 0.4744,
593
+ "rewards/accuracies": 0.987500011920929,
594
+ "rewards/chosen": 0.09802711009979248,
595
+ "rewards/margins": 0.5436574816703796,
596
+ "rewards/rejected": -0.44563040137290955,
597
  "step": 410
598
  },
599
  {
600
  "epoch": 0.87,
601
  "learning_rate": 7.454128440366971e-08,
602
+ "logits/chosen": -3.0196666717529297,
603
+ "logits/rejected": -3.0026302337646484,
604
+ "logps/chosen": -272.02630615234375,
605
+ "logps/rejected": -82.01240539550781,
606
+ "loss": 0.481,
607
+ "rewards/accuracies": 0.956250011920929,
608
+ "rewards/chosen": 0.08279488980770111,
609
+ "rewards/margins": 0.5704164505004883,
610
+ "rewards/rejected": -0.48762160539627075,
611
  "step": 420
612
  },
613
  {
614
  "epoch": 0.89,
615
  "learning_rate": 6.307339449541284e-08,
616
+ "logits/chosen": -3.0509345531463623,
617
+ "logits/rejected": -3.0137345790863037,
618
+ "logps/chosen": -262.2018127441406,
619
+ "logps/rejected": -77.63418579101562,
620
+ "loss": 0.4731,
621
+ "rewards/accuracies": 0.9750000238418579,
622
+ "rewards/chosen": 0.1073322519659996,
623
+ "rewards/margins": 0.5776056051254272,
624
+ "rewards/rejected": -0.4702734053134918,
625
  "step": 430
626
  },
627
  {
628
  "epoch": 0.91,
629
  "learning_rate": 5.1605504587155966e-08,
630
+ "logits/chosen": -3.0285000801086426,
631
+ "logits/rejected": -3.0236475467681885,
632
+ "logps/chosen": -266.83599853515625,
633
+ "logps/rejected": -77.38362121582031,
634
+ "loss": 0.476,
635
+ "rewards/accuracies": 0.9437500238418579,
636
+ "rewards/chosen": 0.08291526138782501,
637
+ "rewards/margins": 0.4984784722328186,
638
+ "rewards/rejected": -0.41556310653686523,
639
  "step": 440
640
  },
641
  {
642
  "epoch": 0.93,
643
  "learning_rate": 4.0137614678899086e-08,
644
+ "logits/chosen": -3.02640438079834,
645
+ "logits/rejected": -3.011373996734619,
646
+ "logps/chosen": -295.5868835449219,
647
+ "logps/rejected": -80.76414489746094,
648
+ "loss": 0.4707,
649
+ "rewards/accuracies": 0.96875,
650
+ "rewards/chosen": 0.09663239866495132,
651
+ "rewards/margins": 0.5815601944923401,
652
+ "rewards/rejected": -0.48492780327796936,
653
  "step": 450
654
  },
655
  {
656
  "epoch": 0.95,
657
  "learning_rate": 2.86697247706422e-08,
658
+ "logits/chosen": -3.0195059776306152,
659
+ "logits/rejected": -2.988323926925659,
660
+ "logps/chosen": -300.5026550292969,
661
+ "logps/rejected": -86.79838562011719,
662
+ "loss": 0.4808,
663
+ "rewards/accuracies": 0.96875,
664
+ "rewards/chosen": 0.11054690927267075,
665
+ "rewards/margins": 0.5899176001548767,
666
+ "rewards/rejected": -0.47937074303627014,
667
  "step": 460
668
  },
669
  {
670
  "epoch": 0.97,
671
  "learning_rate": 1.720183486238532e-08,
672
+ "logits/chosen": -3.0426931381225586,
673
+ "logits/rejected": -3.0394179821014404,
674
+ "logps/chosen": -235.52706909179688,
675
+ "logps/rejected": -73.9857406616211,
676
+ "loss": 0.4819,
677
+ "rewards/accuracies": 0.956250011920929,
678
+ "rewards/chosen": 0.08785500377416611,
679
+ "rewards/margins": 0.5274263620376587,
680
+ "rewards/rejected": -0.4395713806152344,
681
  "step": 470
682
  },
683
  {
684
  "epoch": 0.99,
685
  "learning_rate": 5.73394495412844e-09,
686
+ "logits/chosen": -3.0092616081237793,
687
+ "logits/rejected": -2.972731590270996,
688
+ "logps/chosen": -249.88876342773438,
689
+ "logps/rejected": -85.80451965332031,
690
+ "loss": 0.482,
691
+ "rewards/accuracies": 0.96875,
692
+ "rewards/chosen": 0.07512323558330536,
693
+ "rewards/margins": 0.5230099558830261,
694
+ "rewards/rejected": -0.44788676500320435,
695
  "step": 480
696
  },
697
  {
698
  "epoch": 1.0,
699
+ "eval_logits/chosen": -3.034407377243042,
700
+ "eval_logits/rejected": -3.069913864135742,
701
+ "eval_logps/chosen": -271.40020751953125,
702
+ "eval_logps/rejected": -175.5244140625,
703
+ "eval_loss": 0.5650191903114319,
704
+ "eval_rewards/accuracies": 0.76953125,
705
+ "eval_rewards/chosen": 0.08157022297382355,
706
+ "eval_rewards/margins": 0.33799096941947937,
707
+ "eval_rewards/rejected": -0.25642073154449463,
708
+ "eval_runtime": 256.4523,
709
+ "eval_samples_per_second": 7.799,
710
+ "eval_steps_per_second": 0.062,
711
  "step": 485
712
  },
713
  {
714
  "epoch": 1.0,
715
  "step": 485,
716
  "total_flos": 0.0,
717
+ "train_loss": 0.5539181610972611,
718
+ "train_runtime": 15602.6148,
719
+ "train_samples_per_second": 3.978,
720
  "train_steps_per_second": 0.031
721
  }
722
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0e4c37c4ba74a7c42b124e93bdaa61e543cd3533851bf4e87301d3ef2e466cd
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:178ca2e9659218d5d6a040bc9b839def7f1e2d04d37ea8a694c883bf79442126
3
  size 5688