prd-nguyenvo commited on
Commit
008b9e4
1 Parent(s): 21eb29d

Model save

Browse files
README.md CHANGED
@@ -15,15 +15,15 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [Open-Orca/Mistral-7B-OpenOrca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.1299
19
- - Rewards/chosen: -2.1998
20
- - Rewards/rejected: -6.8894
21
- - Rewards/accuracies: 0.9521
22
- - Rewards/margins: 4.6897
23
- - Logps/rejected: -133.8546
24
- - Logps/chosen: -85.1177
25
- - Logits/rejected: -2.5567
26
- - Logits/chosen: -2.5727
27
 
28
  ## Model description
29
 
@@ -42,7 +42,7 @@ More information needed
42
  ### Training hyperparameters
43
 
44
  The following hyperparameters were used during training:
45
- - learning_rate: 5e-06
46
  - train_batch_size: 1
47
  - eval_batch_size: 1
48
  - seed: 42
@@ -52,14 +52,13 @@ The following hyperparameters were used during training:
52
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
53
  - lr_scheduler_type: linear
54
  - lr_scheduler_warmup_ratio: 0.1
55
- - num_epochs: 2
56
 
57
  ### Training results
58
 
59
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
60
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
61
- | 0.1534 | 1.0 | 109 | 0.1544 | -1.6908 | -5.4640 | 0.9395 | 3.7732 | -119.6004 | -80.0284 | -2.5938 | -2.6084 |
62
- | 0.0995 | 1.99 | 218 | 0.1299 | -2.1998 | -6.8894 | 0.9521 | 4.6897 | -133.8546 | -85.1177 | -2.5567 | -2.5727 |
63
 
64
 
65
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [Open-Orca/Mistral-7B-OpenOrca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.2452
19
+ - Rewards/chosen: -0.7312
20
+ - Rewards/rejected: -2.7785
21
+ - Rewards/accuracies: 0.9132
22
+ - Rewards/margins: 2.0473
23
+ - Logps/rejected: -92.7458
24
+ - Logps/chosen: -70.4321
25
+ - Logits/rejected: -2.6590
26
+ - Logits/chosen: -2.6728
27
 
28
  ## Model description
29
 
 
42
  ### Training hyperparameters
43
 
44
  The following hyperparameters were used during training:
45
+ - learning_rate: 3e-06
46
  - train_batch_size: 1
47
  - eval_batch_size: 1
48
  - seed: 42
 
52
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
53
  - lr_scheduler_type: linear
54
  - lr_scheduler_warmup_ratio: 0.1
55
+ - num_epochs: 1
56
 
57
  ### Training results
58
 
59
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
60
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
61
+ | 0.2434 | 1.0 | 109 | 0.2452 | -0.7312 | -2.7785 | 0.9132 | 2.0473 | -92.7458 | -70.4321 | -2.6590 | -2.6728 |
 
62
 
63
 
64
  ### Framework versions
adapter_config.json CHANGED
@@ -16,8 +16,8 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "q_proj",
20
  "v_proj",
 
21
  "k_proj",
22
  "o_proj"
23
  ],
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
 
19
  "v_proj",
20
+ "q_proj",
21
  "k_proj",
22
  "o_proj"
23
  ],
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ded8821d3aefde9251de2b67e8473a6521245ba464d191e6179682e7916ff7e3
3
  size 218138576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01b81fb6d0db8862e194bf6369b940209b779c3b8b68f5984b4b4c0e6b117ef8
3
  size 218138576
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "epoch": 1.99,
3
- "eval_logits/chosen": -2.5726864337921143,
4
- "eval_logits/rejected": -2.556718587875366,
5
- "eval_logps/chosen": -85.11767578125,
6
- "eval_logps/rejected": -133.85464477539062,
7
- "eval_loss": 0.12992651760578156,
8
- "eval_rewards/accuracies": 0.9520547986030579,
9
- "eval_rewards/chosen": -2.199751138687134,
10
- "eval_rewards/margins": 4.689663887023926,
11
- "eval_rewards/rejected": -6.889414310455322,
12
- "eval_runtime": 288.3554,
13
  "eval_samples": 876,
14
- "eval_samples_per_second": 3.038,
15
- "eval_steps_per_second": 3.038,
16
- "train_loss": 0.20939183043777396,
17
- "train_runtime": 5162.9911,
18
  "train_samples": 3505,
19
- "train_samples_per_second": 1.358,
20
  "train_steps_per_second": 0.042
21
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": -2.6727941036224365,
4
+ "eval_logits/rejected": -2.6590042114257812,
5
+ "eval_logps/chosen": -70.43208312988281,
6
+ "eval_logps/rejected": -92.74579620361328,
7
+ "eval_loss": 0.24523001909255981,
8
+ "eval_rewards/accuracies": 0.913241982460022,
9
+ "eval_rewards/chosen": -0.7311916947364807,
10
+ "eval_rewards/margins": 2.0473380088806152,
11
+ "eval_rewards/rejected": -2.778529405593872,
12
+ "eval_runtime": 288.4496,
13
  "eval_samples": 876,
14
+ "eval_samples_per_second": 3.037,
15
+ "eval_steps_per_second": 3.037,
16
+ "train_loss": 0.3616804119643815,
17
+ "train_runtime": 2568.179,
18
  "train_samples": 3505,
19
+ "train_samples_per_second": 1.365,
20
  "train_steps_per_second": 0.042
21
  }
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 1.99,
3
- "eval_logits/chosen": -2.5726864337921143,
4
- "eval_logits/rejected": -2.556718587875366,
5
- "eval_logps/chosen": -85.11767578125,
6
- "eval_logps/rejected": -133.85464477539062,
7
- "eval_loss": 0.12992651760578156,
8
- "eval_rewards/accuracies": 0.9520547986030579,
9
- "eval_rewards/chosen": -2.199751138687134,
10
- "eval_rewards/margins": 4.689663887023926,
11
- "eval_rewards/rejected": -6.889414310455322,
12
- "eval_runtime": 288.3554,
13
  "eval_samples": 876,
14
- "eval_samples_per_second": 3.038,
15
- "eval_steps_per_second": 3.038
16
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": -2.6727941036224365,
4
+ "eval_logits/rejected": -2.6590042114257812,
5
+ "eval_logps/chosen": -70.43208312988281,
6
+ "eval_logps/rejected": -92.74579620361328,
7
+ "eval_loss": 0.24523001909255981,
8
+ "eval_rewards/accuracies": 0.913241982460022,
9
+ "eval_rewards/chosen": -0.7311916947364807,
10
+ "eval_rewards/margins": 2.0473380088806152,
11
+ "eval_rewards/rejected": -2.778529405593872,
12
+ "eval_runtime": 288.4496,
13
  "eval_samples": 876,
14
+ "eval_samples_per_second": 3.037,
15
+ "eval_steps_per_second": 3.037
16
  }
runs/Dec19_13-43-08_desktop-linux-nlp1/events.out.tfevents.1702968267.desktop-linux-nlp1.3536715.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60466be35623e70dc3691c1104226dc2a80db095f5212e7c9af57deb060ff573
3
+ size 8728
runs/Dec19_14-07-27_desktop-linux-nlp1/events.out.tfevents.1702969726.desktop-linux-nlp1.3564065.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e7926b8e663de8bfdb43712f5d79ebe4c0bed2d49bf25f3b09342bea3775c81
3
+ size 12295
runs/Dec19_14-07-27_desktop-linux-nlp1/events.out.tfevents.1702972583.desktop-linux-nlp1.3564065.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac0fb1689d947adadb16134d5efc503ad3add478f41c6063f111a1a8520d1356
3
+ size 815
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.99,
3
- "train_loss": 0.20939183043777396,
4
- "train_runtime": 5162.9911,
5
  "train_samples": 3505,
6
- "train_samples_per_second": 1.358,
7
  "train_steps_per_second": 0.042
8
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.3616804119643815,
4
+ "train_runtime": 2568.179,
5
  "train_samples": 3505,
6
+ "train_samples_per_second": 1.365,
7
  "train_steps_per_second": 0.042
8
  }
trainer_state.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.9902995720399428,
5
  "eval_steps": 100,
6
- "global_step": 218,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.01,
13
- "learning_rate": 2.2727272727272729e-07,
14
  "logits/chosen": -2.779836893081665,
15
  "logits/rejected": -2.772892951965332,
16
  "logps/chosen": -67.39044952392578,
@@ -24,343 +24,173 @@
24
  },
25
  {
26
  "epoch": 0.09,
27
- "learning_rate": 2.2727272727272728e-06,
28
- "logits/chosen": -2.764805793762207,
29
- "logits/rejected": -2.7586262226104736,
30
- "logps/chosen": -63.05746841430664,
31
- "logps/rejected": -64.96013641357422,
32
- "loss": 0.6864,
33
- "rewards/accuracies": 0.6006944179534912,
34
- "rewards/chosen": 0.009925955906510353,
35
- "rewards/margins": 0.013827367685735226,
36
- "rewards/rejected": -0.0039014113135635853,
37
  "step": 10
38
  },
39
  {
40
  "epoch": 0.18,
41
- "learning_rate": 4.5454545454545455e-06,
42
- "logits/chosen": -2.7655444145202637,
43
- "logits/rejected": -2.7531120777130127,
44
- "logps/chosen": -60.976318359375,
45
- "logps/rejected": -64.35781860351562,
46
- "loss": 0.5979,
47
- "rewards/accuracies": 0.8500000238418579,
48
- "rewards/chosen": 0.06535812467336655,
49
- "rewards/margins": 0.22067773342132568,
50
- "rewards/rejected": -0.15531960129737854,
51
  "step": 20
52
  },
53
  {
54
  "epoch": 0.27,
55
- "learning_rate": 4.795918367346939e-06,
56
- "logits/chosen": -2.7331104278564453,
57
- "logits/rejected": -2.722367763519287,
58
- "logps/chosen": -63.54418182373047,
59
- "logps/rejected": -77.56448364257812,
60
- "loss": 0.3997,
61
- "rewards/accuracies": 0.878125011920929,
62
- "rewards/chosen": -0.11322204768657684,
63
- "rewards/margins": 0.9385285377502441,
64
- "rewards/rejected": -1.0517505407333374,
65
  "step": 30
66
  },
67
  {
68
  "epoch": 0.37,
69
- "learning_rate": 4.540816326530613e-06,
70
- "logits/chosen": -2.685359477996826,
71
- "logits/rejected": -2.6721653938293457,
72
- "logps/chosen": -67.8324966430664,
73
- "logps/rejected": -89.94172668457031,
74
- "loss": 0.2768,
75
- "rewards/accuracies": 0.918749988079071,
76
- "rewards/chosen": -0.5856838226318359,
77
- "rewards/margins": 1.7414783239364624,
78
- "rewards/rejected": -2.327162265777588,
79
  "step": 40
80
  },
81
  {
82
  "epoch": 0.46,
83
- "learning_rate": 4.2857142857142855e-06,
84
- "logits/chosen": -2.660297393798828,
85
- "logits/rejected": -2.6442055702209473,
86
- "logps/chosen": -72.59104919433594,
87
- "logps/rejected": -93.20745849609375,
88
- "loss": 0.2475,
89
- "rewards/accuracies": 0.90625,
90
- "rewards/chosen": -0.7966881394386292,
91
- "rewards/margins": 2.058647632598877,
92
- "rewards/rejected": -2.8553357124328613,
93
  "step": 50
94
  },
95
  {
96
  "epoch": 0.55,
97
- "learning_rate": 4.03061224489796e-06,
98
- "logits/chosen": -2.643165111541748,
99
- "logits/rejected": -2.63153076171875,
100
- "logps/chosen": -72.09125518798828,
101
- "logps/rejected": -99.36156463623047,
102
- "loss": 0.2392,
103
- "rewards/accuracies": 0.903124988079071,
104
- "rewards/chosen": -1.083268404006958,
105
- "rewards/margins": 2.3197226524353027,
106
- "rewards/rejected": -3.4029908180236816,
107
  "step": 60
108
  },
109
  {
110
  "epoch": 0.64,
111
- "learning_rate": 3.7755102040816327e-06,
112
- "logits/chosen": -2.638352632522583,
113
- "logits/rejected": -2.6236445903778076,
114
- "logps/chosen": -77.40001678466797,
115
- "logps/rejected": -107.3912124633789,
116
- "loss": 0.2044,
117
- "rewards/accuracies": 0.918749988079071,
118
- "rewards/chosen": -1.5230330228805542,
119
- "rewards/margins": 2.718259572982788,
120
- "rewards/rejected": -4.241292476654053,
121
  "step": 70
122
  },
123
  {
124
  "epoch": 0.73,
125
- "learning_rate": 3.5204081632653062e-06,
126
- "logits/chosen": -2.6177425384521484,
127
- "logits/rejected": -2.597738265991211,
128
- "logps/chosen": -75.93782043457031,
129
- "logps/rejected": -108.6824951171875,
130
- "loss": 0.1696,
131
- "rewards/accuracies": 0.9375,
132
- "rewards/chosen": -1.198141098022461,
133
- "rewards/margins": 3.195936918258667,
134
- "rewards/rejected": -4.394078254699707,
135
  "step": 80
136
  },
137
  {
138
  "epoch": 0.82,
139
- "learning_rate": 3.2653061224489794e-06,
140
- "logits/chosen": -2.6300315856933594,
141
- "logits/rejected": -2.6183547973632812,
142
- "logps/chosen": -80.39871978759766,
143
- "logps/rejected": -113.6379165649414,
144
- "loss": 0.1925,
145
  "rewards/accuracies": 0.8968750238418579,
146
- "rewards/chosen": -1.5853779315948486,
147
- "rewards/margins": 3.1853203773498535,
148
- "rewards/rejected": -4.770698547363281,
149
  "step": 90
150
  },
151
  {
152
  "epoch": 0.91,
153
- "learning_rate": 3.0102040816326534e-06,
154
- "logits/chosen": -2.6064066886901855,
155
- "logits/rejected": -2.594722270965576,
156
- "logps/chosen": -81.89433288574219,
157
- "logps/rejected": -115.00162506103516,
158
- "loss": 0.1534,
159
- "rewards/accuracies": 0.9468749761581421,
160
- "rewards/chosen": -1.6648212671279907,
161
- "rewards/margins": 3.4960713386535645,
162
- "rewards/rejected": -5.160892486572266,
163
  "step": 100
164
  },
165
  {
166
  "epoch": 1.0,
167
- "eval_logits/chosen": -2.6083757877349854,
168
- "eval_logits/rejected": -2.5937814712524414,
169
- "eval_logps/chosen": -80.02838134765625,
170
- "eval_logps/rejected": -119.6004409790039,
171
- "eval_loss": 0.15436382591724396,
172
- "eval_rewards/accuracies": 0.939497709274292,
173
- "eval_rewards/chosen": -1.6908209323883057,
174
- "eval_rewards/margins": 3.7731716632843018,
175
- "eval_rewards/rejected": -5.463992595672607,
176
- "eval_runtime": 295.1725,
177
- "eval_samples_per_second": 2.968,
178
- "eval_steps_per_second": 2.968,
179
- "step": 109
180
- },
181
- {
182
- "epoch": 1.0,
183
- "learning_rate": 2.7551020408163266e-06,
184
- "logits/chosen": -2.6033756732940674,
185
- "logits/rejected": -2.582958221435547,
186
- "logps/chosen": -80.38322448730469,
187
- "logps/rejected": -116.6507797241211,
188
- "loss": 0.138,
189
- "rewards/accuracies": 0.949999988079071,
190
- "rewards/chosen": -1.656935691833496,
191
- "rewards/margins": 3.698695659637451,
192
- "rewards/rejected": -5.3556318283081055,
193
- "step": 110
194
- },
195
- {
196
- "epoch": 1.1,
197
- "learning_rate": 2.5e-06,
198
- "logits/chosen": -2.604905843734741,
199
- "logits/rejected": -2.591404438018799,
200
- "logps/chosen": -80.86669921875,
201
- "logps/rejected": -119.6518783569336,
202
- "loss": 0.1395,
203
- "rewards/accuracies": 0.9437500238418579,
204
- "rewards/chosen": -1.803047776222229,
205
- "rewards/margins": 3.7810962200164795,
206
- "rewards/rejected": -5.58414363861084,
207
- "step": 120
208
- },
209
- {
210
- "epoch": 1.19,
211
- "learning_rate": 2.244897959183674e-06,
212
- "logits/chosen": -2.590282440185547,
213
- "logits/rejected": -2.576897144317627,
214
- "logps/chosen": -83.53189086914062,
215
- "logps/rejected": -131.67037963867188,
216
- "loss": 0.1004,
217
- "rewards/accuracies": 0.981249988079071,
218
- "rewards/chosen": -1.9269969463348389,
219
- "rewards/margins": 4.388735294342041,
220
- "rewards/rejected": -6.315732002258301,
221
- "step": 130
222
- },
223
- {
224
- "epoch": 1.28,
225
- "learning_rate": 1.989795918367347e-06,
226
- "logits/chosen": -2.578322410583496,
227
- "logits/rejected": -2.5577735900878906,
228
- "logps/chosen": -85.14395141601562,
229
- "logps/rejected": -126.8256607055664,
230
- "loss": 0.1422,
231
- "rewards/accuracies": 0.9375,
232
- "rewards/chosen": -2.2692408561706543,
233
- "rewards/margins": 4.1861348152160645,
234
- "rewards/rejected": -6.455375671386719,
235
- "step": 140
236
- },
237
- {
238
- "epoch": 1.37,
239
- "learning_rate": 1.7346938775510206e-06,
240
- "logits/chosen": -2.5812907218933105,
241
- "logits/rejected": -2.5663979053497314,
242
- "logps/chosen": -87.79288482666016,
243
- "logps/rejected": -131.3497314453125,
244
- "loss": 0.1284,
245
- "rewards/accuracies": 0.949999988079071,
246
- "rewards/chosen": -2.4120209217071533,
247
- "rewards/margins": 4.293553352355957,
248
- "rewards/rejected": -6.705574035644531,
249
- "step": 150
250
- },
251
- {
252
- "epoch": 1.46,
253
- "learning_rate": 1.479591836734694e-06,
254
- "logits/chosen": -2.596059560775757,
255
- "logits/rejected": -2.58156156539917,
256
- "logps/chosen": -83.30199432373047,
257
- "logps/rejected": -132.67092895507812,
258
- "loss": 0.1066,
259
- "rewards/accuracies": 0.9593750238418579,
260
- "rewards/chosen": -2.065701484680176,
261
- "rewards/margins": 4.5842509269714355,
262
- "rewards/rejected": -6.6499528884887695,
263
- "step": 160
264
- },
265
- {
266
- "epoch": 1.55,
267
- "learning_rate": 1.2244897959183673e-06,
268
- "logits/chosen": -2.5813376903533936,
269
- "logits/rejected": -2.569676160812378,
270
- "logps/chosen": -82.94264221191406,
271
- "logps/rejected": -132.35789489746094,
272
- "loss": 0.0994,
273
- "rewards/accuracies": 0.96875,
274
- "rewards/chosen": -2.1285476684570312,
275
- "rewards/margins": 4.649580955505371,
276
- "rewards/rejected": -6.778128147125244,
277
- "step": 170
278
- },
279
- {
280
- "epoch": 1.64,
281
- "learning_rate": 9.69387755102041e-07,
282
- "logits/chosen": -2.575303077697754,
283
- "logits/rejected": -2.5673909187316895,
284
- "logps/chosen": -86.48652648925781,
285
- "logps/rejected": -134.35574340820312,
286
- "loss": 0.1224,
287
- "rewards/accuracies": 0.956250011920929,
288
- "rewards/chosen": -2.278262138366699,
289
- "rewards/margins": 4.526266098022461,
290
- "rewards/rejected": -6.80452823638916,
291
- "step": 180
292
- },
293
- {
294
- "epoch": 1.73,
295
- "learning_rate": 7.142857142857143e-07,
296
- "logits/chosen": -2.5673184394836426,
297
- "logits/rejected": -2.5392918586730957,
298
- "logps/chosen": -85.58756256103516,
299
- "logps/rejected": -128.51121520996094,
300
- "loss": 0.1345,
301
- "rewards/accuracies": 0.9624999761581421,
302
- "rewards/chosen": -2.157008409500122,
303
- "rewards/margins": 4.34613037109375,
304
- "rewards/rejected": -6.503138542175293,
305
- "step": 190
306
- },
307
- {
308
- "epoch": 1.83,
309
- "learning_rate": 4.591836734693878e-07,
310
- "logits/chosen": -2.5774478912353516,
311
- "logits/rejected": -2.556039333343506,
312
- "logps/chosen": -84.05760955810547,
313
- "logps/rejected": -132.11903381347656,
314
- "loss": 0.1088,
315
- "rewards/accuracies": 0.9593750238418579,
316
- "rewards/chosen": -2.128478527069092,
317
- "rewards/margins": 4.659018039703369,
318
- "rewards/rejected": -6.787497043609619,
319
- "step": 200
320
- },
321
- {
322
- "epoch": 1.92,
323
- "learning_rate": 2.0408163265306121e-07,
324
- "logits/chosen": -2.563249111175537,
325
- "logits/rejected": -2.5445797443389893,
326
- "logps/chosen": -83.83250427246094,
327
- "logps/rejected": -133.15673828125,
328
- "loss": 0.0995,
329
- "rewards/accuracies": 0.965624988079071,
330
- "rewards/chosen": -2.065305709838867,
331
- "rewards/margins": 4.883781909942627,
332
- "rewards/rejected": -6.949087619781494,
333
- "step": 210
334
- },
335
- {
336
- "epoch": 1.99,
337
- "eval_logits/chosen": -2.5726864337921143,
338
- "eval_logits/rejected": -2.556718587875366,
339
- "eval_logps/chosen": -85.11767578125,
340
- "eval_logps/rejected": -133.85464477539062,
341
- "eval_loss": 0.12992651760578156,
342
- "eval_rewards/accuracies": 0.9520547986030579,
343
- "eval_rewards/chosen": -2.199751138687134,
344
- "eval_rewards/margins": 4.689663887023926,
345
- "eval_rewards/rejected": -6.889414310455322,
346
- "eval_runtime": 288.5007,
347
  "eval_samples_per_second": 3.036,
348
  "eval_steps_per_second": 3.036,
349
- "step": 218
350
  },
351
  {
352
- "epoch": 1.99,
353
- "step": 218,
354
  "total_flos": 0.0,
355
- "train_loss": 0.20939183043777396,
356
- "train_runtime": 5162.9911,
357
- "train_samples_per_second": 1.358,
358
  "train_steps_per_second": 0.042
359
  }
360
  ],
361
  "logging_steps": 10,
362
- "max_steps": 218,
363
- "num_train_epochs": 2,
364
  "save_steps": 500,
365
  "total_flos": 0.0,
366
  "trial_name": null,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9951497860199715,
5
  "eval_steps": 100,
6
+ "global_step": 109,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.01,
13
+ "learning_rate": 2.7272727272727274e-07,
14
  "logits/chosen": -2.779836893081665,
15
  "logits/rejected": -2.772892951965332,
16
  "logps/chosen": -67.39044952392578,
 
24
  },
25
  {
26
  "epoch": 0.09,
27
+ "learning_rate": 2.7272727272727272e-06,
28
+ "logits/chosen": -2.764448881149292,
29
+ "logits/rejected": -2.7583670616149902,
30
+ "logps/chosen": -63.044189453125,
31
+ "logps/rejected": -65.00924682617188,
32
+ "loss": 0.6834,
33
+ "rewards/accuracies": 0.6076388955116272,
34
+ "rewards/chosen": 0.011253755539655685,
35
+ "rewards/margins": 0.020065149292349815,
36
+ "rewards/rejected": -0.008811394684016705,
37
  "step": 10
38
  },
39
  {
40
  "epoch": 0.18,
41
+ "learning_rate": 2.7244897959183674e-06,
42
+ "logits/chosen": -2.7638134956359863,
43
+ "logits/rejected": -2.7512309551239014,
44
+ "logps/chosen": -60.88775634765625,
45
+ "logps/rejected": -64.6989517211914,
46
+ "loss": 0.5807,
47
+ "rewards/accuracies": 0.8656250238418579,
48
+ "rewards/chosen": 0.074214868247509,
49
+ "rewards/margins": 0.26364782452583313,
50
+ "rewards/rejected": -0.18943293392658234,
51
  "step": 20
52
  },
53
  {
54
  "epoch": 0.27,
55
+ "learning_rate": 2.4183673469387754e-06,
56
+ "logits/chosen": -2.7466235160827637,
57
+ "logits/rejected": -2.7364418506622314,
58
+ "logps/chosen": -62.1016845703125,
59
+ "logps/rejected": -73.9842529296875,
60
+ "loss": 0.4425,
61
+ "rewards/accuracies": 0.8812500238418579,
62
+ "rewards/chosen": 0.031027918681502342,
63
+ "rewards/margins": 0.7247552275657654,
64
+ "rewards/rejected": -0.6937273740768433,
65
  "step": 30
66
  },
67
  {
68
  "epoch": 0.37,
69
+ "learning_rate": 2.112244897959184e-06,
70
+ "logits/chosen": -2.720366954803467,
71
+ "logits/rejected": -2.707810401916504,
72
+ "logps/chosen": -64.14788818359375,
73
+ "logps/rejected": -80.15257263183594,
74
+ "loss": 0.3545,
75
+ "rewards/accuracies": 0.909375011920929,
76
+ "rewards/chosen": -0.217222660779953,
77
+ "rewards/margins": 1.13102388381958,
78
+ "rewards/rejected": -1.348246693611145,
79
  "step": 40
80
  },
81
  {
82
  "epoch": 0.46,
83
+ "learning_rate": 1.806122448979592e-06,
84
+ "logits/chosen": -2.703882932662964,
85
+ "logits/rejected": -2.688373327255249,
86
+ "logps/chosen": -68.16423034667969,
87
+ "logps/rejected": -81.43087005615234,
88
+ "loss": 0.3228,
89
+ "rewards/accuracies": 0.903124988079071,
90
+ "rewards/chosen": -0.3540056347846985,
91
+ "rewards/margins": 1.3236706256866455,
92
+ "rewards/rejected": -1.6776764392852783,
93
  "step": 50
94
  },
95
  {
96
  "epoch": 0.55,
97
+ "learning_rate": 1.5e-06,
98
+ "logits/chosen": -2.6891207695007324,
99
+ "logits/rejected": -2.6766979694366455,
100
+ "logps/chosen": -65.70265197753906,
101
+ "logps/rejected": -84.64549255371094,
102
+ "loss": 0.3174,
103
+ "rewards/accuracies": 0.862500011920929,
104
+ "rewards/chosen": -0.44440922141075134,
105
+ "rewards/margins": 1.486973762512207,
106
+ "rewards/rejected": -1.9313831329345703,
107
  "step": 60
108
  },
109
  {
110
  "epoch": 0.64,
111
+ "learning_rate": 1.193877551020408e-06,
112
+ "logits/chosen": -2.696516275405884,
113
+ "logits/rejected": -2.6814627647399902,
114
+ "logps/chosen": -68.18341827392578,
115
+ "logps/rejected": -88.01350402832031,
116
+ "loss": 0.2817,
117
+ "rewards/accuracies": 0.903124988079071,
118
+ "rewards/chosen": -0.6013726592063904,
119
+ "rewards/margins": 1.7021477222442627,
120
+ "rewards/rejected": -2.303520441055298,
121
  "step": 70
122
  },
123
  {
124
  "epoch": 0.73,
125
+ "learning_rate": 8.877551020408164e-07,
126
+ "logits/chosen": -2.680356025695801,
127
+ "logits/rejected": -2.662485122680664,
128
+ "logps/chosen": -69.7511978149414,
129
+ "logps/rejected": -89.8312759399414,
130
+ "loss": 0.2427,
131
+ "rewards/accuracies": 0.921875,
132
+ "rewards/chosen": -0.5794776678085327,
133
+ "rewards/margins": 1.9294792413711548,
134
+ "rewards/rejected": -2.5089569091796875,
135
  "step": 80
136
  },
137
  {
138
  "epoch": 0.82,
139
+ "learning_rate": 5.816326530612245e-07,
140
+ "logits/chosen": -2.6861064434051514,
141
+ "logits/rejected": -2.6741652488708496,
142
+ "logps/chosen": -71.44612884521484,
143
+ "logps/rejected": -92.25032806396484,
144
+ "loss": 0.257,
145
  "rewards/accuracies": 0.8968750238418579,
146
+ "rewards/chosen": -0.6901184320449829,
147
+ "rewards/margins": 1.9418220520019531,
148
+ "rewards/rejected": -2.6319406032562256,
149
  "step": 90
150
  },
151
  {
152
  "epoch": 0.91,
153
+ "learning_rate": 2.7551020408163265e-07,
154
+ "logits/chosen": -2.6638271808624268,
155
+ "logits/rejected": -2.6519863605499268,
156
+ "logps/chosen": -72.42386627197266,
157
+ "logps/rejected": -90.99058532714844,
158
+ "loss": 0.2434,
159
+ "rewards/accuracies": 0.9281250238418579,
160
+ "rewards/chosen": -0.7177737355232239,
161
+ "rewards/margins": 2.0420150756835938,
162
+ "rewards/rejected": -2.759788751602173,
163
  "step": 100
164
  },
165
  {
166
  "epoch": 1.0,
167
+ "eval_logits/chosen": -2.6727941036224365,
168
+ "eval_logits/rejected": -2.6590042114257812,
169
+ "eval_logps/chosen": -70.43208312988281,
170
+ "eval_logps/rejected": -92.74579620361328,
171
+ "eval_loss": 0.24523001909255981,
172
+ "eval_rewards/accuracies": 0.913241982460022,
173
+ "eval_rewards/chosen": -0.7311916947364807,
174
+ "eval_rewards/margins": 2.0473380088806152,
175
+ "eval_rewards/rejected": -2.778529405593872,
176
+ "eval_runtime": 288.5183,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  "eval_samples_per_second": 3.036,
178
  "eval_steps_per_second": 3.036,
179
+ "step": 109
180
  },
181
  {
182
+ "epoch": 1.0,
183
+ "step": 109,
184
  "total_flos": 0.0,
185
+ "train_loss": 0.3616804119643815,
186
+ "train_runtime": 2568.179,
187
+ "train_samples_per_second": 1.365,
188
  "train_steps_per_second": 0.042
189
  }
190
  ],
191
  "logging_steps": 10,
192
+ "max_steps": 109,
193
+ "num_train_epochs": 1,
194
  "save_steps": 500,
195
  "total_flos": 0.0,
196
  "trial_name": null,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe8f0c39b850ed7fd27d2d9d44c44150886dfb15af5e347d3620975ca4e1e90a
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ade3ff1207bb0aca50542a9f6fec0fb54178c6bb14556c5d0f48ce4a8b973c2
3
  size 4728