Training in progress, epoch 1
Browse files- README.md +11 -11
- adapter_config.json +3 -3
- adapter_model.safetensors +1 -1
- all_results.json +15 -15
- eval_results.json +11 -11
- train_results.json +4 -4
- trainer_state.json +451 -451
- training_args.bin +1 -1
README.md
CHANGED
@@ -18,15 +18,15 @@ should probably proofread and complete it, then remove this comment. -->
|
|
18 |
|
19 |
This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the HuggingFaceH4/ultrafeedback_binarized dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
-
- Loss: 0.
|
22 |
-
- Rewards/chosen: 0.
|
23 |
-
- Rewards/rejected: -0.
|
24 |
-
- Rewards/accuracies: 0.
|
25 |
-
- Rewards/margins: 0.
|
26 |
-
- Logps/rejected: -
|
27 |
-
- Logps/chosen: -271.
|
28 |
-
- Logits/rejected: -3.
|
29 |
-
- Logits/chosen: -3.
|
30 |
|
31 |
## Model description
|
32 |
|
@@ -62,12 +62,12 @@ The following hyperparameters were used during training:
|
|
62 |
|
63 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
64 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
65 |
-
| 0.
|
66 |
|
67 |
|
68 |
### Framework versions
|
69 |
|
70 |
- Transformers 4.35.0
|
71 |
-
- Pytorch 2.1.
|
72 |
- Datasets 2.14.6
|
73 |
- Tokenizers 0.14.1
|
|
|
18 |
|
19 |
This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the HuggingFaceH4/ultrafeedback_binarized dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 0.6488
|
22 |
+
- Rewards/chosen: 0.0341
|
23 |
+
- Rewards/rejected: -0.0820
|
24 |
+
- Rewards/accuracies: 0.7109
|
25 |
+
- Rewards/margins: 0.1161
|
26 |
+
- Logps/rejected: -224.8079
|
27 |
+
- Logps/chosen: -271.6428
|
28 |
+
- Logits/rejected: -3.0562
|
29 |
+
- Logits/chosen: -3.0761
|
30 |
|
31 |
## Model description
|
32 |
|
|
|
62 |
|
63 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
64 |
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
65 |
+
| 0.6488 | 1.0 | 485 | 0.6488 | 0.0341 | -0.0820 | 0.7109 | 0.1161 | -224.8079 | -271.6428 | -3.0562 | -3.0761 |
|
66 |
|
67 |
|
68 |
### Framework versions
|
69 |
|
70 |
- Transformers 4.35.0
|
71 |
+
- Pytorch 2.1.0+cu121
|
72 |
- Datasets 2.14.6
|
73 |
- Tokenizers 0.14.1
|
adapter_config.json
CHANGED
@@ -16,10 +16,10 @@
|
|
16 |
"rank_pattern": {},
|
17 |
"revision": null,
|
18 |
"target_modules": [
|
19 |
-
"v_proj",
|
20 |
-
"o_proj",
|
21 |
"q_proj",
|
22 |
-
"k_proj"
|
|
|
|
|
23 |
],
|
24 |
"task_type": "CAUSAL_LM"
|
25 |
}
|
|
|
16 |
"rank_pattern": {},
|
17 |
"revision": null,
|
18 |
"target_modules": [
|
|
|
|
|
19 |
"q_proj",
|
20 |
+
"k_proj",
|
21 |
+
"v_proj",
|
22 |
+
"o_proj"
|
23 |
],
|
24 |
"task_type": "CAUSAL_LM"
|
25 |
}
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 109086672
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f4b73195a43a92625562cb4c520ddd964103582be6f8b732cf7b60aff8d31b2
|
3 |
size 109086672
|
all_results.json
CHANGED
@@ -1,21 +1,21 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"eval_logits/chosen": -3.
|
4 |
-
"eval_logits/rejected": -3.
|
5 |
-
"eval_logps/chosen": -271.
|
6 |
-
"eval_logps/rejected": -
|
7 |
-
"eval_loss": 0.
|
8 |
-
"eval_rewards/accuracies": 0.
|
9 |
-
"eval_rewards/chosen": 0.
|
10 |
-
"eval_rewards/margins": 0.
|
11 |
-
"eval_rewards/rejected": -0.
|
12 |
-
"eval_runtime": 254.
|
13 |
"eval_samples": 2000,
|
14 |
-
"eval_samples_per_second": 7.
|
15 |
"eval_steps_per_second": 0.063,
|
16 |
-
"train_loss": 0.
|
17 |
-
"train_runtime":
|
18 |
-
"train_samples":
|
19 |
-
"train_samples_per_second": 3.
|
20 |
"train_steps_per_second": 0.031
|
21 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"eval_logits/chosen": -3.07612943649292,
|
4 |
+
"eval_logits/rejected": -3.056239604949951,
|
5 |
+
"eval_logps/chosen": -271.6427917480469,
|
6 |
+
"eval_logps/rejected": -224.8079376220703,
|
7 |
+
"eval_loss": 0.6488261818885803,
|
8 |
+
"eval_rewards/accuracies": 0.7109375,
|
9 |
+
"eval_rewards/chosen": 0.034067459404468536,
|
10 |
+
"eval_rewards/margins": 0.11610361933708191,
|
11 |
+
"eval_rewards/rejected": -0.08203616738319397,
|
12 |
+
"eval_runtime": 254.1478,
|
13 |
"eval_samples": 2000,
|
14 |
+
"eval_samples_per_second": 7.869,
|
15 |
"eval_steps_per_second": 0.063,
|
16 |
+
"train_loss": 0.6667533972828659,
|
17 |
+
"train_runtime": 15505.6746,
|
18 |
+
"train_samples": 61966,
|
19 |
+
"train_samples_per_second": 3.996,
|
20 |
"train_steps_per_second": 0.031
|
21 |
}
|
eval_results.json
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"eval_logits/chosen": -3.
|
4 |
-
"eval_logits/rejected": -3.
|
5 |
-
"eval_logps/chosen": -271.
|
6 |
-
"eval_logps/rejected": -
|
7 |
-
"eval_loss": 0.
|
8 |
-
"eval_rewards/accuracies": 0.
|
9 |
-
"eval_rewards/chosen": 0.
|
10 |
-
"eval_rewards/margins": 0.
|
11 |
-
"eval_rewards/rejected": -0.
|
12 |
-
"eval_runtime": 254.
|
13 |
"eval_samples": 2000,
|
14 |
-
"eval_samples_per_second": 7.
|
15 |
"eval_steps_per_second": 0.063
|
16 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"eval_logits/chosen": -3.07612943649292,
|
4 |
+
"eval_logits/rejected": -3.056239604949951,
|
5 |
+
"eval_logps/chosen": -271.6427917480469,
|
6 |
+
"eval_logps/rejected": -224.8079376220703,
|
7 |
+
"eval_loss": 0.6488261818885803,
|
8 |
+
"eval_rewards/accuracies": 0.7109375,
|
9 |
+
"eval_rewards/chosen": 0.034067459404468536,
|
10 |
+
"eval_rewards/margins": 0.11610361933708191,
|
11 |
+
"eval_rewards/rejected": -0.08203616738319397,
|
12 |
+
"eval_runtime": 254.1478,
|
13 |
"eval_samples": 2000,
|
14 |
+
"eval_samples_per_second": 7.869,
|
15 |
"eval_steps_per_second": 0.063
|
16 |
}
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
-
"train_samples":
|
6 |
-
"train_samples_per_second": 3.
|
7 |
"train_steps_per_second": 0.031
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"train_loss": 0.6667533972828659,
|
4 |
+
"train_runtime": 15505.6746,
|
5 |
+
"train_samples": 61966,
|
6 |
+
"train_samples_per_second": 3.996,
|
7 |
"train_steps_per_second": 0.031
|
8 |
}
|
trainer_state.json
CHANGED
@@ -11,10 +11,10 @@
|
|
11 |
{
|
12 |
"epoch": 0.0,
|
13 |
"learning_rate": 1.020408163265306e-08,
|
14 |
-
"logits/chosen": -
|
15 |
-
"logits/rejected": -
|
16 |
-
"logps/chosen": -
|
17 |
-
"logps/rejected": -
|
18 |
"loss": 0.6931,
|
19 |
"rewards/accuracies": 0.0,
|
20 |
"rewards/chosen": 0.0,
|
@@ -25,698 +25,698 @@
|
|
25 |
{
|
26 |
"epoch": 0.02,
|
27 |
"learning_rate": 1.0204081632653061e-07,
|
28 |
-
"logits/chosen": -
|
29 |
-
"logits/rejected": -3.
|
30 |
-
"logps/chosen": -
|
31 |
-
"logps/rejected": -
|
32 |
-
"loss": 0.
|
33 |
-
"rewards/accuracies": 0.
|
34 |
-
"rewards/chosen": -0.
|
35 |
-
"rewards/margins":
|
36 |
-
"rewards/rejected": -0.
|
37 |
"step": 10
|
38 |
},
|
39 |
{
|
40 |
"epoch": 0.04,
|
41 |
"learning_rate": 2.0408163265306121e-07,
|
42 |
-
"logits/chosen": -
|
43 |
-
"logits/rejected": -
|
44 |
-
"logps/chosen": -
|
45 |
-
"logps/rejected": -
|
46 |
-
"loss": 0.
|
47 |
-
"rewards/accuracies": 0.
|
48 |
-
"rewards/chosen": 0.
|
49 |
-
"rewards/margins": 0.
|
50 |
-
"rewards/rejected": -0.
|
51 |
"step": 20
|
52 |
},
|
53 |
{
|
54 |
"epoch": 0.06,
|
55 |
"learning_rate": 3.0612244897959183e-07,
|
56 |
-
"logits/chosen": -
|
57 |
-
"logits/rejected": -
|
58 |
-
"logps/chosen": -
|
59 |
-
"logps/rejected": -
|
60 |
-
"loss": 0.
|
61 |
-
"rewards/accuracies": 0.
|
62 |
-
"rewards/chosen":
|
63 |
-
"rewards/margins": 0.
|
64 |
-
"rewards/rejected":
|
65 |
"step": 30
|
66 |
},
|
67 |
{
|
68 |
"epoch": 0.08,
|
69 |
"learning_rate": 4.0816326530612243e-07,
|
70 |
-
"logits/chosen": -3.
|
71 |
-
"logits/rejected": -3.
|
72 |
-
"logps/chosen": -
|
73 |
-
"logps/rejected": -
|
74 |
-
"loss": 0.
|
75 |
-
"rewards/accuracies": 0.
|
76 |
-
"rewards/chosen": 0.
|
77 |
-
"rewards/margins": 0.
|
78 |
-
"rewards/rejected": -0.
|
79 |
"step": 40
|
80 |
},
|
81 |
{
|
82 |
"epoch": 0.1,
|
83 |
"learning_rate": 4.988532110091743e-07,
|
84 |
-
"logits/chosen": -3.
|
85 |
-
"logits/rejected": -3.
|
86 |
-
"logps/chosen": -
|
87 |
-
"logps/rejected": -
|
88 |
-
"loss": 0.
|
89 |
-
"rewards/accuracies": 0.
|
90 |
-
"rewards/chosen": 0.
|
91 |
-
"rewards/margins": 0.
|
92 |
-
"rewards/rejected": -0.
|
93 |
"step": 50
|
94 |
},
|
95 |
{
|
96 |
"epoch": 0.12,
|
97 |
"learning_rate": 4.873853211009174e-07,
|
98 |
-
"logits/chosen": -3.
|
99 |
-
"logits/rejected": -3.
|
100 |
-
"logps/chosen": -
|
101 |
-
"logps/rejected": -
|
102 |
-
"loss": 0.
|
103 |
-
"rewards/accuracies": 0.
|
104 |
-
"rewards/chosen": 0.
|
105 |
-
"rewards/margins": 0.
|
106 |
-
"rewards/rejected": -0.
|
107 |
"step": 60
|
108 |
},
|
109 |
{
|
110 |
"epoch": 0.14,
|
111 |
"learning_rate": 4.7591743119266054e-07,
|
112 |
-
"logits/chosen": -3.
|
113 |
-
"logits/rejected": -3.
|
114 |
-
"logps/chosen": -
|
115 |
-
"logps/rejected": -
|
116 |
-
"loss": 0.
|
117 |
-
"rewards/accuracies": 0.
|
118 |
-
"rewards/chosen": 0.
|
119 |
-
"rewards/margins": 0.
|
120 |
-
"rewards/rejected": -0.
|
121 |
"step": 70
|
122 |
},
|
123 |
{
|
124 |
"epoch": 0.16,
|
125 |
"learning_rate": 4.644495412844037e-07,
|
126 |
-
"logits/chosen": -3.
|
127 |
-
"logits/rejected": -
|
128 |
-
"logps/chosen": -
|
129 |
-
"logps/rejected": -
|
130 |
-
"loss": 0.
|
131 |
-
"rewards/accuracies": 0.
|
132 |
-
"rewards/chosen": 0.
|
133 |
-
"rewards/margins": 0.
|
134 |
-
"rewards/rejected": -0.
|
135 |
"step": 80
|
136 |
},
|
137 |
{
|
138 |
"epoch": 0.19,
|
139 |
"learning_rate": 4.5298165137614677e-07,
|
140 |
-
"logits/chosen": -3.
|
141 |
-
"logits/rejected": -3.
|
142 |
-
"logps/chosen": -
|
143 |
-
"logps/rejected": -
|
144 |
-
"loss": 0.
|
145 |
-
"rewards/accuracies": 0.
|
146 |
-
"rewards/chosen": 0.
|
147 |
-
"rewards/margins": 0.
|
148 |
-
"rewards/rejected": -0.
|
149 |
"step": 90
|
150 |
},
|
151 |
{
|
152 |
"epoch": 0.21,
|
153 |
"learning_rate": 4.4151376146788986e-07,
|
154 |
-
"logits/chosen": -3.
|
155 |
-
"logits/rejected": -3.
|
156 |
-
"logps/chosen": -
|
157 |
-
"logps/rejected": -
|
158 |
-
"loss": 0.
|
159 |
-
"rewards/accuracies": 0.
|
160 |
-
"rewards/chosen": 0.
|
161 |
-
"rewards/margins": 0.
|
162 |
-
"rewards/rejected": -0.
|
163 |
"step": 100
|
164 |
},
|
165 |
{
|
166 |
"epoch": 0.23,
|
167 |
"learning_rate": 4.30045871559633e-07,
|
168 |
-
"logits/chosen": -3.
|
169 |
-
"logits/rejected": -
|
170 |
-
"logps/chosen": -
|
171 |
-
"logps/rejected": -
|
172 |
-
"loss": 0.
|
173 |
-
"rewards/accuracies": 0.
|
174 |
-
"rewards/chosen": 0.
|
175 |
-
"rewards/margins": 0.
|
176 |
-
"rewards/rejected": -0.
|
177 |
"step": 110
|
178 |
},
|
179 |
{
|
180 |
"epoch": 0.25,
|
181 |
"learning_rate": 4.1857798165137613e-07,
|
182 |
-
"logits/chosen": -3.
|
183 |
-
"logits/rejected": -3.
|
184 |
-
"logps/chosen": -
|
185 |
-
"logps/rejected": -
|
186 |
-
"loss": 0.
|
187 |
-
"rewards/accuracies": 0.
|
188 |
-
"rewards/chosen": 0.
|
189 |
-
"rewards/margins": 0.
|
190 |
-
"rewards/rejected": -0.
|
191 |
"step": 120
|
192 |
},
|
193 |
{
|
194 |
"epoch": 0.27,
|
195 |
"learning_rate": 4.071100917431192e-07,
|
196 |
-
"logits/chosen": -
|
197 |
-
"logits/rejected": -
|
198 |
-
"logps/chosen": -
|
199 |
-
"logps/rejected": -
|
200 |
-
"loss": 0.
|
201 |
-
"rewards/accuracies": 0.
|
202 |
-
"rewards/chosen": 0.
|
203 |
-
"rewards/margins": 0.
|
204 |
-
"rewards/rejected": -0.
|
205 |
"step": 130
|
206 |
},
|
207 |
{
|
208 |
"epoch": 0.29,
|
209 |
"learning_rate": 3.9564220183486236e-07,
|
210 |
-
"logits/chosen": -3.
|
211 |
-
"logits/rejected": -3.
|
212 |
-
"logps/chosen": -
|
213 |
-
"logps/rejected": -
|
214 |
-
"loss": 0.
|
215 |
-
"rewards/accuracies": 0.
|
216 |
-
"rewards/chosen": 0.
|
217 |
-
"rewards/margins": 0.
|
218 |
-
"rewards/rejected": -0.
|
219 |
"step": 140
|
220 |
},
|
221 |
{
|
222 |
"epoch": 0.31,
|
223 |
"learning_rate": 3.841743119266055e-07,
|
224 |
-
"logits/chosen": -3.
|
225 |
-
"logits/rejected": -
|
226 |
-
"logps/chosen": -
|
227 |
-
"logps/rejected": -
|
228 |
-
"loss": 0.
|
229 |
-
"rewards/accuracies": 0.
|
230 |
-
"rewards/chosen": 0.
|
231 |
-
"rewards/margins": 0.
|
232 |
-
"rewards/rejected": -0.
|
233 |
"step": 150
|
234 |
},
|
235 |
{
|
236 |
"epoch": 0.33,
|
237 |
"learning_rate": 3.7270642201834864e-07,
|
238 |
-
"logits/chosen": -3.
|
239 |
-
"logits/rejected": -3.
|
240 |
-
"logps/chosen": -
|
241 |
-
"logps/rejected": -
|
242 |
-
"loss": 0.
|
243 |
-
"rewards/accuracies": 0.
|
244 |
-
"rewards/chosen": 0.
|
245 |
-
"rewards/margins": 0.
|
246 |
-
"rewards/rejected": -0.
|
247 |
"step": 160
|
248 |
},
|
249 |
{
|
250 |
"epoch": 0.35,
|
251 |
"learning_rate": 3.612385321100918e-07,
|
252 |
-
"logits/chosen": -3.
|
253 |
-
"logits/rejected": -3.
|
254 |
-
"logps/chosen": -
|
255 |
-
"logps/rejected": -
|
256 |
-
"loss": 0.
|
257 |
-
"rewards/accuracies": 0.
|
258 |
-
"rewards/chosen": 0.
|
259 |
-
"rewards/margins": 0.
|
260 |
-
"rewards/rejected": -0.
|
261 |
"step": 170
|
262 |
},
|
263 |
{
|
264 |
"epoch": 0.37,
|
265 |
"learning_rate": 3.497706422018348e-07,
|
266 |
-
"logits/chosen": -
|
267 |
-
"logits/rejected": -3.
|
268 |
-
"logps/chosen": -
|
269 |
-
"logps/rejected": -
|
270 |
-
"loss": 0.
|
271 |
-
"rewards/accuracies": 0.
|
272 |
-
"rewards/chosen": 0.
|
273 |
-
"rewards/margins": 0.
|
274 |
-
"rewards/rejected": -0.
|
275 |
"step": 180
|
276 |
},
|
277 |
{
|
278 |
"epoch": 0.39,
|
279 |
"learning_rate": 3.3830275229357795e-07,
|
280 |
-
"logits/chosen": -3.
|
281 |
-
"logits/rejected": -3.
|
282 |
-
"logps/chosen": -
|
283 |
-
"logps/rejected": -
|
284 |
-
"loss": 0.
|
285 |
-
"rewards/accuracies": 0.
|
286 |
-
"rewards/chosen": 0.
|
287 |
-
"rewards/margins": 0.
|
288 |
-
"rewards/rejected": -0.
|
289 |
"step": 190
|
290 |
},
|
291 |
{
|
292 |
"epoch": 0.41,
|
293 |
"learning_rate": 3.268348623853211e-07,
|
294 |
-
"logits/chosen": -3.
|
295 |
-
"logits/rejected": -3.
|
296 |
-
"logps/chosen": -
|
297 |
-
"logps/rejected": -
|
298 |
-
"loss": 0.
|
299 |
-
"rewards/accuracies": 0.
|
300 |
-
"rewards/chosen": 0.
|
301 |
-
"rewards/margins": 0.
|
302 |
-
"rewards/rejected": -0.
|
303 |
"step": 200
|
304 |
},
|
305 |
{
|
306 |
"epoch": 0.43,
|
307 |
"learning_rate": 3.1536697247706423e-07,
|
308 |
-
"logits/chosen": -3.
|
309 |
-
"logits/rejected": -3.
|
310 |
-
"logps/chosen": -
|
311 |
-
"logps/rejected": -
|
312 |
-
"loss": 0.
|
313 |
-
"rewards/accuracies": 0.
|
314 |
-
"rewards/chosen": 0.
|
315 |
-
"rewards/margins": 0.
|
316 |
-
"rewards/rejected": -0.
|
317 |
"step": 210
|
318 |
},
|
319 |
{
|
320 |
"epoch": 0.45,
|
321 |
"learning_rate": 3.038990825688073e-07,
|
322 |
-
"logits/chosen": -3.
|
323 |
-
"logits/rejected": -3.
|
324 |
-
"logps/chosen": -
|
325 |
-
"logps/rejected": -
|
326 |
-
"loss": 0.
|
327 |
-
"rewards/accuracies": 0.
|
328 |
-
"rewards/chosen": 0.
|
329 |
-
"rewards/margins": 0.
|
330 |
-
"rewards/rejected": -0.
|
331 |
"step": 220
|
332 |
},
|
333 |
{
|
334 |
"epoch": 0.47,
|
335 |
"learning_rate": 2.9243119266055045e-07,
|
336 |
-
"logits/chosen": -
|
337 |
-
"logits/rejected": -
|
338 |
-
"logps/chosen": -
|
339 |
-
"logps/rejected": -
|
340 |
-
"loss": 0.
|
341 |
-
"rewards/accuracies": 0.
|
342 |
-
"rewards/chosen": 0.
|
343 |
-
"rewards/margins": 0.
|
344 |
-
"rewards/rejected": -0.
|
345 |
"step": 230
|
346 |
},
|
347 |
{
|
348 |
"epoch": 0.49,
|
349 |
"learning_rate": 2.809633027522936e-07,
|
350 |
-
"logits/chosen": -3.
|
351 |
-
"logits/rejected": -3.
|
352 |
-
"logps/chosen": -
|
353 |
-
"logps/rejected": -
|
354 |
-
"loss": 0.
|
355 |
-
"rewards/accuracies": 0.
|
356 |
-
"rewards/chosen": 0.
|
357 |
-
"rewards/margins": 0.
|
358 |
-
"rewards/rejected": -0.
|
359 |
"step": 240
|
360 |
},
|
361 |
{
|
362 |
"epoch": 0.52,
|
363 |
"learning_rate": 2.6949541284403673e-07,
|
364 |
-
"logits/chosen": -3.
|
365 |
-
"logits/rejected": -3.
|
366 |
-
"logps/chosen": -
|
367 |
-
"logps/rejected": -
|
368 |
-
"loss": 0.
|
369 |
-
"rewards/accuracies": 0.
|
370 |
-
"rewards/chosen": 0.
|
371 |
-
"rewards/margins": 0.
|
372 |
-
"rewards/rejected": -0.
|
373 |
"step": 250
|
374 |
},
|
375 |
{
|
376 |
"epoch": 0.54,
|
377 |
"learning_rate": 2.5802752293577976e-07,
|
378 |
-
"logits/chosen": -
|
379 |
-
"logits/rejected": -2.
|
380 |
-
"logps/chosen": -
|
381 |
-
"logps/rejected": -
|
382 |
-
"loss": 0.
|
383 |
-
"rewards/accuracies": 0.
|
384 |
-
"rewards/chosen": 0.
|
385 |
-
"rewards/margins": 0.
|
386 |
-
"rewards/rejected": -0.
|
387 |
"step": 260
|
388 |
},
|
389 |
{
|
390 |
"epoch": 0.56,
|
391 |
"learning_rate": 2.465596330275229e-07,
|
392 |
-
"logits/chosen": -3.
|
393 |
-
"logits/rejected": -3.
|
394 |
-
"logps/chosen": -
|
395 |
-
"logps/rejected": -
|
396 |
-
"loss": 0.
|
397 |
-
"rewards/accuracies": 0.
|
398 |
-
"rewards/chosen": 0.
|
399 |
-
"rewards/margins": 0.
|
400 |
-
"rewards/rejected": -0.
|
401 |
"step": 270
|
402 |
},
|
403 |
{
|
404 |
"epoch": 0.58,
|
405 |
"learning_rate": 2.3509174311926604e-07,
|
406 |
-
"logits/chosen": -3.
|
407 |
-
"logits/rejected": -
|
408 |
-
"logps/chosen": -
|
409 |
-
"logps/rejected": -
|
410 |
-
"loss": 0.
|
411 |
-
"rewards/accuracies": 0.
|
412 |
-
"rewards/chosen": 0.
|
413 |
-
"rewards/margins": 0.
|
414 |
-
"rewards/rejected": -0.
|
415 |
"step": 280
|
416 |
},
|
417 |
{
|
418 |
"epoch": 0.6,
|
419 |
"learning_rate": 2.2362385321100916e-07,
|
420 |
-
"logits/chosen": -3.
|
421 |
-
"logits/rejected": -3.
|
422 |
-
"logps/chosen": -
|
423 |
-
"logps/rejected": -
|
424 |
-
"loss": 0.
|
425 |
-
"rewards/accuracies": 0.
|
426 |
-
"rewards/chosen": 0.
|
427 |
-
"rewards/margins": 0.
|
428 |
-
"rewards/rejected": -0.
|
429 |
"step": 290
|
430 |
},
|
431 |
{
|
432 |
"epoch": 0.62,
|
433 |
"learning_rate": 2.121559633027523e-07,
|
434 |
-
"logits/chosen": -
|
435 |
-
"logits/rejected": -
|
436 |
-
"logps/chosen": -
|
437 |
-
"logps/rejected": -
|
438 |
-
"loss": 0.
|
439 |
-
"rewards/accuracies": 0.
|
440 |
-
"rewards/chosen": 0.
|
441 |
-
"rewards/margins": 0.
|
442 |
-
"rewards/rejected": -0.
|
443 |
"step": 300
|
444 |
},
|
445 |
{
|
446 |
"epoch": 0.64,
|
447 |
"learning_rate": 2.0068807339449538e-07,
|
448 |
-
"logits/chosen": -3.
|
449 |
-
"logits/rejected": -3.
|
450 |
-
"logps/chosen": -
|
451 |
-
"logps/rejected": -
|
452 |
-
"loss": 0.
|
453 |
-
"rewards/accuracies": 0.
|
454 |
-
"rewards/chosen": 0.
|
455 |
-
"rewards/margins": 0.
|
456 |
-
"rewards/rejected": -0.
|
457 |
"step": 310
|
458 |
},
|
459 |
{
|
460 |
"epoch": 0.66,
|
461 |
"learning_rate": 1.8922018348623852e-07,
|
462 |
-
"logits/chosen": -3.
|
463 |
-
"logits/rejected": -3.
|
464 |
-
"logps/chosen": -
|
465 |
-
"logps/rejected": -
|
466 |
-
"loss": 0.
|
467 |
-
"rewards/accuracies": 0.
|
468 |
-
"rewards/chosen": 0.
|
469 |
-
"rewards/margins": 0.
|
470 |
-
"rewards/rejected": -0.
|
471 |
"step": 320
|
472 |
},
|
473 |
{
|
474 |
"epoch": 0.68,
|
475 |
"learning_rate": 1.7775229357798163e-07,
|
476 |
-
"logits/chosen": -3.
|
477 |
-
"logits/rejected": -3.
|
478 |
-
"logps/chosen": -
|
479 |
-
"logps/rejected": -
|
480 |
-
"loss": 0.
|
481 |
-
"rewards/accuracies": 0.
|
482 |
-
"rewards/chosen": 0.
|
483 |
-
"rewards/margins": 0.
|
484 |
-
"rewards/rejected": -0.
|
485 |
"step": 330
|
486 |
},
|
487 |
{
|
488 |
"epoch": 0.7,
|
489 |
"learning_rate": 1.6628440366972477e-07,
|
490 |
-
"logits/chosen": -3.
|
491 |
-
"logits/rejected": -3.
|
492 |
-
"logps/chosen": -
|
493 |
-
"logps/rejected": -
|
494 |
-
"loss": 0.
|
495 |
-
"rewards/accuracies": 0.
|
496 |
-
"rewards/chosen": 0.
|
497 |
-
"rewards/margins": 0.
|
498 |
-
"rewards/rejected": -0.
|
499 |
"step": 340
|
500 |
},
|
501 |
{
|
502 |
"epoch": 0.72,
|
503 |
"learning_rate": 1.5481651376146786e-07,
|
504 |
-
"logits/chosen": -3.
|
505 |
-
"logits/rejected": -3.
|
506 |
-
"logps/chosen": -
|
507 |
-
"logps/rejected": -
|
508 |
-
"loss": 0.
|
509 |
-
"rewards/accuracies": 0.
|
510 |
-
"rewards/chosen": 0.
|
511 |
-
"rewards/margins": 0.
|
512 |
-
"rewards/rejected": -0.
|
513 |
"step": 350
|
514 |
},
|
515 |
{
|
516 |
"epoch": 0.74,
|
517 |
"learning_rate": 1.43348623853211e-07,
|
518 |
-
"logits/chosen": -
|
519 |
-
"logits/rejected": -
|
520 |
-
"logps/chosen": -
|
521 |
-
"logps/rejected": -
|
522 |
-
"loss": 0.
|
523 |
-
"rewards/accuracies": 0.
|
524 |
-
"rewards/chosen": 0.
|
525 |
-
"rewards/margins": 0.
|
526 |
-
"rewards/rejected": -0.
|
527 |
"step": 360
|
528 |
},
|
529 |
{
|
530 |
"epoch": 0.76,
|
531 |
"learning_rate": 1.318807339449541e-07,
|
532 |
-
"logits/chosen": -
|
533 |
-
"logits/rejected": -
|
534 |
-
"logps/chosen": -
|
535 |
-
"logps/rejected": -
|
536 |
-
"loss": 0.
|
537 |
-
"rewards/accuracies": 0.
|
538 |
-
"rewards/chosen": 0.
|
539 |
-
"rewards/margins": 0.
|
540 |
-
"rewards/rejected": -0.
|
541 |
"step": 370
|
542 |
},
|
543 |
{
|
544 |
"epoch": 0.78,
|
545 |
"learning_rate": 1.2041284403669725e-07,
|
546 |
-
"logits/chosen": -3.
|
547 |
-
"logits/rejected": -3.
|
548 |
-
"logps/chosen": -
|
549 |
-
"logps/rejected": -
|
550 |
-
"loss": 0.
|
551 |
-
"rewards/accuracies": 0.
|
552 |
-
"rewards/chosen": 0.
|
553 |
-
"rewards/margins": 0.
|
554 |
-
"rewards/rejected": -0.
|
555 |
"step": 380
|
556 |
},
|
557 |
{
|
558 |
"epoch": 0.8,
|
559 |
"learning_rate": 1.0894495412844036e-07,
|
560 |
-
"logits/chosen": -
|
561 |
-
"logits/rejected": -3.
|
562 |
-
"logps/chosen": -
|
563 |
-
"logps/rejected": -
|
564 |
-
"loss": 0.
|
565 |
-
"rewards/accuracies": 0.
|
566 |
-
"rewards/chosen": 0.
|
567 |
-
"rewards/margins": 0.
|
568 |
-
"rewards/rejected": -0.
|
569 |
"step": 390
|
570 |
},
|
571 |
{
|
572 |
"epoch": 0.82,
|
573 |
"learning_rate": 9.747706422018348e-08,
|
574 |
-
"logits/chosen": -3.
|
575 |
-
"logits/rejected": -3.
|
576 |
-
"logps/chosen": -
|
577 |
-
"logps/rejected": -
|
578 |
-
"loss": 0.
|
579 |
-
"rewards/accuracies": 0.
|
580 |
-
"rewards/chosen": 0.
|
581 |
-
"rewards/margins": 0.
|
582 |
-
"rewards/rejected": -0.
|
583 |
"step": 400
|
584 |
},
|
585 |
{
|
586 |
"epoch": 0.85,
|
587 |
"learning_rate": 8.60091743119266e-08,
|
588 |
-
"logits/chosen": -3.
|
589 |
-
"logits/rejected": -3.
|
590 |
-
"logps/chosen": -
|
591 |
-
"logps/rejected": -
|
592 |
-
"loss": 0.
|
593 |
-
"rewards/accuracies": 0.
|
594 |
-
"rewards/chosen": 0.
|
595 |
-
"rewards/margins": 0.
|
596 |
-
"rewards/rejected": -0.
|
597 |
"step": 410
|
598 |
},
|
599 |
{
|
600 |
"epoch": 0.87,
|
601 |
"learning_rate": 7.454128440366971e-08,
|
602 |
-
"logits/chosen": -3.
|
603 |
-
"logits/rejected": -3.
|
604 |
-
"logps/chosen": -
|
605 |
-
"logps/rejected": -
|
606 |
-
"loss": 0.
|
607 |
-
"rewards/accuracies": 0.
|
608 |
-
"rewards/chosen": 0.
|
609 |
-
"rewards/margins": 0.
|
610 |
-
"rewards/rejected": -0.
|
611 |
"step": 420
|
612 |
},
|
613 |
{
|
614 |
"epoch": 0.89,
|
615 |
"learning_rate": 6.307339449541284e-08,
|
616 |
-
"logits/chosen": -3.
|
617 |
-
"logits/rejected": -3.
|
618 |
-
"logps/chosen": -
|
619 |
-
"logps/rejected": -
|
620 |
-
"loss": 0.
|
621 |
-
"rewards/accuracies": 0.
|
622 |
-
"rewards/chosen": 0.
|
623 |
-
"rewards/margins": 0.
|
624 |
-
"rewards/rejected": -0.
|
625 |
"step": 430
|
626 |
},
|
627 |
{
|
628 |
"epoch": 0.91,
|
629 |
"learning_rate": 5.1605504587155966e-08,
|
630 |
-
"logits/chosen": -3.
|
631 |
-
"logits/rejected": -3.
|
632 |
-
"logps/chosen": -
|
633 |
-
"logps/rejected": -
|
634 |
-
"loss": 0.
|
635 |
-
"rewards/accuracies": 0.
|
636 |
-
"rewards/chosen": 0.
|
637 |
-
"rewards/margins": 0.
|
638 |
-
"rewards/rejected": -0.
|
639 |
"step": 440
|
640 |
},
|
641 |
{
|
642 |
"epoch": 0.93,
|
643 |
"learning_rate": 4.0137614678899086e-08,
|
644 |
-
"logits/chosen": -3.
|
645 |
-
"logits/rejected": -
|
646 |
-
"logps/chosen": -
|
647 |
-
"logps/rejected": -
|
648 |
-
"loss": 0.
|
649 |
-
"rewards/accuracies": 0.
|
650 |
-
"rewards/chosen": 0.
|
651 |
-
"rewards/margins": 0.
|
652 |
-
"rewards/rejected": -0.
|
653 |
"step": 450
|
654 |
},
|
655 |
{
|
656 |
"epoch": 0.95,
|
657 |
"learning_rate": 2.86697247706422e-08,
|
658 |
-
"logits/chosen": -3.
|
659 |
-
"logits/rejected": -
|
660 |
-
"logps/chosen": -
|
661 |
-
"logps/rejected": -
|
662 |
-
"loss": 0.
|
663 |
-
"rewards/accuracies": 0.
|
664 |
-
"rewards/chosen": 0.
|
665 |
-
"rewards/margins": 0.
|
666 |
-
"rewards/rejected": -0.
|
667 |
"step": 460
|
668 |
},
|
669 |
{
|
670 |
"epoch": 0.97,
|
671 |
"learning_rate": 1.720183486238532e-08,
|
672 |
-
"logits/chosen": -3.
|
673 |
-
"logits/rejected": -3.
|
674 |
-
"logps/chosen": -
|
675 |
-
"logps/rejected": -
|
676 |
-
"loss": 0.
|
677 |
-
"rewards/accuracies": 0.
|
678 |
-
"rewards/chosen": 0.
|
679 |
-
"rewards/margins": 0.
|
680 |
-
"rewards/rejected": -0.
|
681 |
"step": 470
|
682 |
},
|
683 |
{
|
684 |
"epoch": 0.99,
|
685 |
"learning_rate": 5.73394495412844e-09,
|
686 |
-
"logits/chosen": -3.
|
687 |
-
"logits/rejected": -
|
688 |
-
"logps/chosen": -
|
689 |
-
"logps/rejected": -
|
690 |
-
"loss": 0.
|
691 |
-
"rewards/accuracies": 0.
|
692 |
-
"rewards/chosen": 0.
|
693 |
-
"rewards/margins": 0.
|
694 |
-
"rewards/rejected": -0.
|
695 |
"step": 480
|
696 |
},
|
697 |
{
|
698 |
"epoch": 1.0,
|
699 |
-
"eval_logits/chosen": -3.
|
700 |
-
"eval_logits/rejected": -3.
|
701 |
-
"eval_logps/chosen": -271.
|
702 |
-
"eval_logps/rejected": -
|
703 |
-
"eval_loss": 0.
|
704 |
-
"eval_rewards/accuracies": 0.
|
705 |
-
"eval_rewards/chosen": 0.
|
706 |
-
"eval_rewards/margins": 0.
|
707 |
-
"eval_rewards/rejected": -0.
|
708 |
-
"eval_runtime":
|
709 |
-
"eval_samples_per_second": 7.
|
710 |
-
"eval_steps_per_second": 0.
|
711 |
"step": 485
|
712 |
},
|
713 |
{
|
714 |
"epoch": 1.0,
|
715 |
"step": 485,
|
716 |
"total_flos": 0.0,
|
717 |
-
"train_loss": 0.
|
718 |
-
"train_runtime":
|
719 |
-
"train_samples_per_second": 3.
|
720 |
"train_steps_per_second": 0.031
|
721 |
}
|
722 |
],
|
|
|
11 |
{
|
12 |
"epoch": 0.0,
|
13 |
"learning_rate": 1.020408163265306e-08,
|
14 |
+
"logits/chosen": -2.891636610031128,
|
15 |
+
"logits/rejected": -2.8851490020751953,
|
16 |
+
"logps/chosen": -135.91143798828125,
|
17 |
+
"logps/rejected": -101.67433166503906,
|
18 |
"loss": 0.6931,
|
19 |
"rewards/accuracies": 0.0,
|
20 |
"rewards/chosen": 0.0,
|
|
|
25 |
{
|
26 |
"epoch": 0.02,
|
27 |
"learning_rate": 1.0204081632653061e-07,
|
28 |
+
"logits/chosen": -2.9867801666259766,
|
29 |
+
"logits/rejected": -3.007345199584961,
|
30 |
+
"logps/chosen": -309.9524230957031,
|
31 |
+
"logps/rejected": -272.5204162597656,
|
32 |
+
"loss": 0.6926,
|
33 |
+
"rewards/accuracies": 0.4444444477558136,
|
34 |
+
"rewards/chosen": -0.00014034591731615365,
|
35 |
+
"rewards/margins": 0.0023386774118989706,
|
36 |
+
"rewards/rejected": -0.002479023300111294,
|
37 |
"step": 10
|
38 |
},
|
39 |
{
|
40 |
"epoch": 0.04,
|
41 |
"learning_rate": 2.0408163265306121e-07,
|
42 |
+
"logits/chosen": -3.0750911235809326,
|
43 |
+
"logits/rejected": -3.0683979988098145,
|
44 |
+
"logps/chosen": -282.82012939453125,
|
45 |
+
"logps/rejected": -249.6508331298828,
|
46 |
+
"loss": 0.6922,
|
47 |
+
"rewards/accuracies": 0.48124998807907104,
|
48 |
+
"rewards/chosen": -0.0003494807460810989,
|
49 |
+
"rewards/margins": 0.0025773285888135433,
|
50 |
+
"rewards/rejected": -0.002926809247583151,
|
51 |
"step": 20
|
52 |
},
|
53 |
{
|
54 |
"epoch": 0.06,
|
55 |
"learning_rate": 3.0612244897959183e-07,
|
56 |
+
"logits/chosen": -3.072047710418701,
|
57 |
+
"logits/rejected": -3.0354340076446533,
|
58 |
+
"logps/chosen": -280.91827392578125,
|
59 |
+
"logps/rejected": -199.9836883544922,
|
60 |
+
"loss": 0.6943,
|
61 |
+
"rewards/accuracies": 0.518750011920929,
|
62 |
+
"rewards/chosen": 0.0013565481640398502,
|
63 |
+
"rewards/margins": -0.0006766369333490729,
|
64 |
+
"rewards/rejected": 0.0020331847481429577,
|
65 |
"step": 30
|
66 |
},
|
67 |
{
|
68 |
"epoch": 0.08,
|
69 |
"learning_rate": 4.0816326530612243e-07,
|
70 |
+
"logits/chosen": -3.0383801460266113,
|
71 |
+
"logits/rejected": -3.035770893096924,
|
72 |
+
"logps/chosen": -290.04510498046875,
|
73 |
+
"logps/rejected": -238.2515106201172,
|
74 |
+
"loss": 0.6927,
|
75 |
+
"rewards/accuracies": 0.4625000059604645,
|
76 |
+
"rewards/chosen": 0.0009484182810410857,
|
77 |
+
"rewards/margins": 0.002096892800182104,
|
78 |
+
"rewards/rejected": -0.001148474169895053,
|
79 |
"step": 40
|
80 |
},
|
81 |
{
|
82 |
"epoch": 0.1,
|
83 |
"learning_rate": 4.988532110091743e-07,
|
84 |
+
"logits/chosen": -3.037513017654419,
|
85 |
+
"logits/rejected": -3.0122437477111816,
|
86 |
+
"logps/chosen": -296.66009521484375,
|
87 |
+
"logps/rejected": -217.6807861328125,
|
88 |
+
"loss": 0.6896,
|
89 |
+
"rewards/accuracies": 0.550000011920929,
|
90 |
+
"rewards/chosen": 0.006595917046070099,
|
91 |
+
"rewards/margins": 0.010886356234550476,
|
92 |
+
"rewards/rejected": -0.004290440119802952,
|
93 |
"step": 50
|
94 |
},
|
95 |
{
|
96 |
"epoch": 0.12,
|
97 |
"learning_rate": 4.873853211009174e-07,
|
98 |
+
"logits/chosen": -3.0581448078155518,
|
99 |
+
"logits/rejected": -3.0058109760284424,
|
100 |
+
"logps/chosen": -297.3258361816406,
|
101 |
+
"logps/rejected": -242.928466796875,
|
102 |
+
"loss": 0.6895,
|
103 |
+
"rewards/accuracies": 0.53125,
|
104 |
+
"rewards/chosen": 0.0021637417376041412,
|
105 |
+
"rewards/margins": 0.0060836682096123695,
|
106 |
+
"rewards/rejected": -0.003919926937669516,
|
107 |
"step": 60
|
108 |
},
|
109 |
{
|
110 |
"epoch": 0.14,
|
111 |
"learning_rate": 4.7591743119266054e-07,
|
112 |
+
"logits/chosen": -3.0567173957824707,
|
113 |
+
"logits/rejected": -3.056859254837036,
|
114 |
+
"logps/chosen": -281.0410461425781,
|
115 |
+
"logps/rejected": -256.200927734375,
|
116 |
+
"loss": 0.6886,
|
117 |
+
"rewards/accuracies": 0.574999988079071,
|
118 |
+
"rewards/chosen": 0.006569950375705957,
|
119 |
+
"rewards/margins": 0.011877616867423058,
|
120 |
+
"rewards/rejected": -0.005307666026055813,
|
121 |
"step": 70
|
122 |
},
|
123 |
{
|
124 |
"epoch": 0.16,
|
125 |
"learning_rate": 4.644495412844037e-07,
|
126 |
+
"logits/chosen": -3.0421910285949707,
|
127 |
+
"logits/rejected": -3.0536134243011475,
|
128 |
+
"logps/chosen": -299.5488586425781,
|
129 |
+
"logps/rejected": -233.7873077392578,
|
130 |
+
"loss": 0.6871,
|
131 |
+
"rewards/accuracies": 0.518750011920929,
|
132 |
+
"rewards/chosen": 0.004993592854589224,
|
133 |
+
"rewards/margins": 0.014339953660964966,
|
134 |
+
"rewards/rejected": -0.00934636127203703,
|
135 |
"step": 80
|
136 |
},
|
137 |
{
|
138 |
"epoch": 0.19,
|
139 |
"learning_rate": 4.5298165137614677e-07,
|
140 |
+
"logits/chosen": -3.029874324798584,
|
141 |
+
"logits/rejected": -3.018759250640869,
|
142 |
+
"logps/chosen": -253.44351196289062,
|
143 |
+
"logps/rejected": -201.76646423339844,
|
144 |
+
"loss": 0.6854,
|
145 |
+
"rewards/accuracies": 0.581250011920929,
|
146 |
+
"rewards/chosen": 0.003981114365160465,
|
147 |
+
"rewards/margins": 0.012469857931137085,
|
148 |
+
"rewards/rejected": -0.008488742634654045,
|
149 |
"step": 90
|
150 |
},
|
151 |
{
|
152 |
"epoch": 0.21,
|
153 |
"learning_rate": 4.4151376146788986e-07,
|
154 |
+
"logits/chosen": -3.0842769145965576,
|
155 |
+
"logits/rejected": -3.086177349090576,
|
156 |
+
"logps/chosen": -258.80426025390625,
|
157 |
+
"logps/rejected": -233.66793823242188,
|
158 |
+
"loss": 0.6863,
|
159 |
+
"rewards/accuracies": 0.6000000238418579,
|
160 |
+
"rewards/chosen": 0.004184984136372805,
|
161 |
+
"rewards/margins": 0.017948109656572342,
|
162 |
+
"rewards/rejected": -0.01376312505453825,
|
163 |
"step": 100
|
164 |
},
|
165 |
{
|
166 |
"epoch": 0.23,
|
167 |
"learning_rate": 4.30045871559633e-07,
|
168 |
+
"logits/chosen": -3.050504207611084,
|
169 |
+
"logits/rejected": -3.0392251014709473,
|
170 |
+
"logps/chosen": -275.3699035644531,
|
171 |
+
"logps/rejected": -228.40451049804688,
|
172 |
+
"loss": 0.6809,
|
173 |
+
"rewards/accuracies": 0.637499988079071,
|
174 |
+
"rewards/chosen": 0.007297619245946407,
|
175 |
+
"rewards/margins": 0.02423209697008133,
|
176 |
+
"rewards/rejected": -0.016934476792812347,
|
177 |
"step": 110
|
178 |
},
|
179 |
{
|
180 |
"epoch": 0.25,
|
181 |
"learning_rate": 4.1857798165137613e-07,
|
182 |
+
"logits/chosen": -3.0525004863739014,
|
183 |
+
"logits/rejected": -3.024714946746826,
|
184 |
+
"logps/chosen": -295.898681640625,
|
185 |
+
"logps/rejected": -199.49343872070312,
|
186 |
+
"loss": 0.6804,
|
187 |
+
"rewards/accuracies": 0.5874999761581421,
|
188 |
+
"rewards/chosen": 0.007169491611421108,
|
189 |
+
"rewards/margins": 0.02695578895509243,
|
190 |
+
"rewards/rejected": -0.019786298274993896,
|
191 |
"step": 120
|
192 |
},
|
193 |
{
|
194 |
"epoch": 0.27,
|
195 |
"learning_rate": 4.071100917431192e-07,
|
196 |
+
"logits/chosen": -3.026671886444092,
|
197 |
+
"logits/rejected": -3.028925657272339,
|
198 |
+
"logps/chosen": -268.51568603515625,
|
199 |
+
"logps/rejected": -220.26260375976562,
|
200 |
+
"loss": 0.6796,
|
201 |
+
"rewards/accuracies": 0.6812499761581421,
|
202 |
+
"rewards/chosen": 0.01651640608906746,
|
203 |
+
"rewards/margins": 0.03892368823289871,
|
204 |
+
"rewards/rejected": -0.022407282143831253,
|
205 |
"step": 130
|
206 |
},
|
207 |
{
|
208 |
"epoch": 0.29,
|
209 |
"learning_rate": 3.9564220183486236e-07,
|
210 |
+
"logits/chosen": -3.0486905574798584,
|
211 |
+
"logits/rejected": -3.038287878036499,
|
212 |
+
"logps/chosen": -287.53082275390625,
|
213 |
+
"logps/rejected": -222.7250213623047,
|
214 |
+
"loss": 0.6761,
|
215 |
+
"rewards/accuracies": 0.625,
|
216 |
+
"rewards/chosen": 0.015277748927474022,
|
217 |
+
"rewards/margins": 0.03703855723142624,
|
218 |
+
"rewards/rejected": -0.021760808303952217,
|
219 |
"step": 140
|
220 |
},
|
221 |
{
|
222 |
"epoch": 0.31,
|
223 |
"learning_rate": 3.841743119266055e-07,
|
224 |
+
"logits/chosen": -3.0499072074890137,
|
225 |
+
"logits/rejected": -2.9781885147094727,
|
226 |
+
"logps/chosen": -259.89739990234375,
|
227 |
+
"logps/rejected": -237.8246307373047,
|
228 |
+
"loss": 0.6766,
|
229 |
+
"rewards/accuracies": 0.606249988079071,
|
230 |
+
"rewards/chosen": 0.008082658052444458,
|
231 |
+
"rewards/margins": 0.022740600630640984,
|
232 |
+
"rewards/rejected": -0.014657942578196526,
|
233 |
"step": 150
|
234 |
},
|
235 |
{
|
236 |
"epoch": 0.33,
|
237 |
"learning_rate": 3.7270642201834864e-07,
|
238 |
+
"logits/chosen": -3.0514883995056152,
|
239 |
+
"logits/rejected": -3.022423505783081,
|
240 |
+
"logps/chosen": -309.00494384765625,
|
241 |
+
"logps/rejected": -228.82583618164062,
|
242 |
+
"loss": 0.6761,
|
243 |
+
"rewards/accuracies": 0.6812499761581421,
|
244 |
+
"rewards/chosen": 0.011194340884685516,
|
245 |
+
"rewards/margins": 0.04575566574931145,
|
246 |
+
"rewards/rejected": -0.03456132486462593,
|
247 |
"step": 160
|
248 |
},
|
249 |
{
|
250 |
"epoch": 0.35,
|
251 |
"learning_rate": 3.612385321100918e-07,
|
252 |
+
"logits/chosen": -3.0084691047668457,
|
253 |
+
"logits/rejected": -3.011247158050537,
|
254 |
+
"logps/chosen": -300.25762939453125,
|
255 |
+
"logps/rejected": -233.0517120361328,
|
256 |
+
"loss": 0.6723,
|
257 |
+
"rewards/accuracies": 0.65625,
|
258 |
+
"rewards/chosen": 0.017441127449274063,
|
259 |
+
"rewards/margins": 0.04596921056509018,
|
260 |
+
"rewards/rejected": -0.028528084978461266,
|
261 |
"step": 170
|
262 |
},
|
263 |
{
|
264 |
"epoch": 0.37,
|
265 |
"learning_rate": 3.497706422018348e-07,
|
266 |
+
"logits/chosen": -2.998293161392212,
|
267 |
+
"logits/rejected": -3.03139591217041,
|
268 |
+
"logps/chosen": -299.2662658691406,
|
269 |
+
"logps/rejected": -239.52804565429688,
|
270 |
+
"loss": 0.6677,
|
271 |
+
"rewards/accuracies": 0.643750011920929,
|
272 |
+
"rewards/chosen": 0.02176077291369438,
|
273 |
+
"rewards/margins": 0.04461668059229851,
|
274 |
+
"rewards/rejected": -0.022855903953313828,
|
275 |
"step": 180
|
276 |
},
|
277 |
{
|
278 |
"epoch": 0.39,
|
279 |
"learning_rate": 3.3830275229357795e-07,
|
280 |
+
"logits/chosen": -3.0845465660095215,
|
281 |
+
"logits/rejected": -3.0398240089416504,
|
282 |
+
"logps/chosen": -292.7340087890625,
|
283 |
+
"logps/rejected": -236.06533813476562,
|
284 |
+
"loss": 0.671,
|
285 |
+
"rewards/accuracies": 0.7124999761581421,
|
286 |
+
"rewards/chosen": 0.026021122932434082,
|
287 |
+
"rewards/margins": 0.05270993709564209,
|
288 |
+
"rewards/rejected": -0.02668880857527256,
|
289 |
"step": 190
|
290 |
},
|
291 |
{
|
292 |
"epoch": 0.41,
|
293 |
"learning_rate": 3.268348623853211e-07,
|
294 |
+
"logits/chosen": -3.0351808071136475,
|
295 |
+
"logits/rejected": -3.048321485519409,
|
296 |
+
"logps/chosen": -269.8604736328125,
|
297 |
+
"logps/rejected": -221.87197875976562,
|
298 |
+
"loss": 0.6699,
|
299 |
+
"rewards/accuracies": 0.65625,
|
300 |
+
"rewards/chosen": 0.023518767207860947,
|
301 |
+
"rewards/margins": 0.06251207739114761,
|
302 |
+
"rewards/rejected": -0.03899329900741577,
|
303 |
"step": 200
|
304 |
},
|
305 |
{
|
306 |
"epoch": 0.43,
|
307 |
"learning_rate": 3.1536697247706423e-07,
|
308 |
+
"logits/chosen": -3.0768158435821533,
|
309 |
+
"logits/rejected": -3.083721160888672,
|
310 |
+
"logps/chosen": -282.7914733886719,
|
311 |
+
"logps/rejected": -258.88677978515625,
|
312 |
+
"loss": 0.6694,
|
313 |
+
"rewards/accuracies": 0.65625,
|
314 |
+
"rewards/chosen": 0.017539020627737045,
|
315 |
+
"rewards/margins": 0.06356575340032578,
|
316 |
+
"rewards/rejected": -0.04602673649787903,
|
317 |
"step": 210
|
318 |
},
|
319 |
{
|
320 |
"epoch": 0.45,
|
321 |
"learning_rate": 3.038990825688073e-07,
|
322 |
+
"logits/chosen": -3.031602621078491,
|
323 |
+
"logits/rejected": -3.0251471996307373,
|
324 |
+
"logps/chosen": -291.6885681152344,
|
325 |
+
"logps/rejected": -229.2044219970703,
|
326 |
+
"loss": 0.6661,
|
327 |
+
"rewards/accuracies": 0.6875,
|
328 |
+
"rewards/chosen": 0.019487539306282997,
|
329 |
+
"rewards/margins": 0.06684577465057373,
|
330 |
+
"rewards/rejected": -0.047358229756355286,
|
331 |
"step": 220
|
332 |
},
|
333 |
{
|
334 |
"epoch": 0.47,
|
335 |
"learning_rate": 2.9243119266055045e-07,
|
336 |
+
"logits/chosen": -3.0594446659088135,
|
337 |
+
"logits/rejected": -3.0538389682769775,
|
338 |
+
"logps/chosen": -296.71978759765625,
|
339 |
+
"logps/rejected": -232.9663543701172,
|
340 |
+
"loss": 0.6672,
|
341 |
+
"rewards/accuracies": 0.6625000238418579,
|
342 |
+
"rewards/chosen": 0.012631967663764954,
|
343 |
+
"rewards/margins": 0.05379491299390793,
|
344 |
+
"rewards/rejected": -0.04116294905543327,
|
345 |
"step": 230
|
346 |
},
|
347 |
{
|
348 |
"epoch": 0.49,
|
349 |
"learning_rate": 2.809633027522936e-07,
|
350 |
+
"logits/chosen": -3.009617805480957,
|
351 |
+
"logits/rejected": -3.0026957988739014,
|
352 |
+
"logps/chosen": -244.1639862060547,
|
353 |
+
"logps/rejected": -207.7158203125,
|
354 |
+
"loss": 0.6633,
|
355 |
+
"rewards/accuracies": 0.675000011920929,
|
356 |
+
"rewards/chosen": 0.01613594964146614,
|
357 |
+
"rewards/margins": 0.0652671605348587,
|
358 |
+
"rewards/rejected": -0.049131207168102264,
|
359 |
"step": 240
|
360 |
},
|
361 |
{
|
362 |
"epoch": 0.52,
|
363 |
"learning_rate": 2.6949541284403673e-07,
|
364 |
+
"logits/chosen": -3.0107674598693848,
|
365 |
+
"logits/rejected": -3.012376308441162,
|
366 |
+
"logps/chosen": -287.5134582519531,
|
367 |
+
"logps/rejected": -248.5124053955078,
|
368 |
+
"loss": 0.6593,
|
369 |
+
"rewards/accuracies": 0.6625000238418579,
|
370 |
+
"rewards/chosen": 0.017785798758268356,
|
371 |
+
"rewards/margins": 0.0654246062040329,
|
372 |
+
"rewards/rejected": -0.04763881862163544,
|
373 |
"step": 250
|
374 |
},
|
375 |
{
|
376 |
"epoch": 0.54,
|
377 |
"learning_rate": 2.5802752293577976e-07,
|
378 |
+
"logits/chosen": -3.014228105545044,
|
379 |
+
"logits/rejected": -2.980214834213257,
|
380 |
+
"logps/chosen": -297.2572021484375,
|
381 |
+
"logps/rejected": -246.603515625,
|
382 |
+
"loss": 0.6592,
|
383 |
+
"rewards/accuracies": 0.699999988079071,
|
384 |
+
"rewards/chosen": 0.016676222905516624,
|
385 |
+
"rewards/margins": 0.07184126228094101,
|
386 |
+
"rewards/rejected": -0.055165041238069534,
|
387 |
"step": 260
|
388 |
},
|
389 |
{
|
390 |
"epoch": 0.56,
|
391 |
"learning_rate": 2.465596330275229e-07,
|
392 |
+
"logits/chosen": -3.016359329223633,
|
393 |
+
"logits/rejected": -3.0183348655700684,
|
394 |
+
"logps/chosen": -261.7985534667969,
|
395 |
+
"logps/rejected": -230.5518341064453,
|
396 |
+
"loss": 0.6631,
|
397 |
+
"rewards/accuracies": 0.6499999761581421,
|
398 |
+
"rewards/chosen": 0.03012824058532715,
|
399 |
+
"rewards/margins": 0.07465063035488129,
|
400 |
+
"rewards/rejected": -0.04452239349484444,
|
401 |
"step": 270
|
402 |
},
|
403 |
{
|
404 |
"epoch": 0.58,
|
405 |
"learning_rate": 2.3509174311926604e-07,
|
406 |
+
"logits/chosen": -3.062016487121582,
|
407 |
+
"logits/rejected": -3.08595871925354,
|
408 |
+
"logps/chosen": -271.8788757324219,
|
409 |
+
"logps/rejected": -242.447509765625,
|
410 |
+
"loss": 0.6615,
|
411 |
+
"rewards/accuracies": 0.675000011920929,
|
412 |
+
"rewards/chosen": 0.03031134605407715,
|
413 |
+
"rewards/margins": 0.06919924914836884,
|
414 |
+
"rewards/rejected": -0.03888789564371109,
|
415 |
"step": 280
|
416 |
},
|
417 |
{
|
418 |
"epoch": 0.6,
|
419 |
"learning_rate": 2.2362385321100916e-07,
|
420 |
+
"logits/chosen": -3.065378427505493,
|
421 |
+
"logits/rejected": -3.067957639694214,
|
422 |
+
"logps/chosen": -293.88592529296875,
|
423 |
+
"logps/rejected": -247.717529296875,
|
424 |
+
"loss": 0.6595,
|
425 |
+
"rewards/accuracies": 0.6000000238418579,
|
426 |
+
"rewards/chosen": 0.018432429060339928,
|
427 |
+
"rewards/margins": 0.06363337486982346,
|
428 |
+
"rewards/rejected": -0.04520093649625778,
|
429 |
"step": 290
|
430 |
},
|
431 |
{
|
432 |
"epoch": 0.62,
|
433 |
"learning_rate": 2.121559633027523e-07,
|
434 |
+
"logits/chosen": -3.0399653911590576,
|
435 |
+
"logits/rejected": -3.050255298614502,
|
436 |
+
"logps/chosen": -248.15798950195312,
|
437 |
+
"logps/rejected": -231.6765594482422,
|
438 |
+
"loss": 0.6583,
|
439 |
+
"rewards/accuracies": 0.7124999761581421,
|
440 |
+
"rewards/chosen": 0.011154914274811745,
|
441 |
+
"rewards/margins": 0.07162971049547195,
|
442 |
+
"rewards/rejected": -0.060474805533885956,
|
443 |
"step": 300
|
444 |
},
|
445 |
{
|
446 |
"epoch": 0.64,
|
447 |
"learning_rate": 2.0068807339449538e-07,
|
448 |
+
"logits/chosen": -3.0448567867279053,
|
449 |
+
"logits/rejected": -3.0284125804901123,
|
450 |
+
"logps/chosen": -259.03173828125,
|
451 |
+
"logps/rejected": -213.7626190185547,
|
452 |
+
"loss": 0.6551,
|
453 |
+
"rewards/accuracies": 0.706250011920929,
|
454 |
+
"rewards/chosen": 0.03144986182451248,
|
455 |
+
"rewards/margins": 0.09296337515115738,
|
456 |
+
"rewards/rejected": -0.061513520777225494,
|
457 |
"step": 310
|
458 |
},
|
459 |
{
|
460 |
"epoch": 0.66,
|
461 |
"learning_rate": 1.8922018348623852e-07,
|
462 |
+
"logits/chosen": -3.069314479827881,
|
463 |
+
"logits/rejected": -3.0522797107696533,
|
464 |
+
"logps/chosen": -247.6428680419922,
|
465 |
+
"logps/rejected": -224.86416625976562,
|
466 |
+
"loss": 0.6537,
|
467 |
+
"rewards/accuracies": 0.6875,
|
468 |
+
"rewards/chosen": 0.03007657267153263,
|
469 |
+
"rewards/margins": 0.09640363603830338,
|
470 |
+
"rewards/rejected": -0.0663270577788353,
|
471 |
"step": 320
|
472 |
},
|
473 |
{
|
474 |
"epoch": 0.68,
|
475 |
"learning_rate": 1.7775229357798163e-07,
|
476 |
+
"logits/chosen": -3.0534980297088623,
|
477 |
+
"logits/rejected": -3.0750725269317627,
|
478 |
+
"logps/chosen": -292.9278564453125,
|
479 |
+
"logps/rejected": -239.49560546875,
|
480 |
+
"loss": 0.654,
|
481 |
+
"rewards/accuracies": 0.6812499761581421,
|
482 |
+
"rewards/chosen": 0.03384874761104584,
|
483 |
+
"rewards/margins": 0.09248127043247223,
|
484 |
+
"rewards/rejected": -0.05863253027200699,
|
485 |
"step": 330
|
486 |
},
|
487 |
{
|
488 |
"epoch": 0.7,
|
489 |
"learning_rate": 1.6628440366972477e-07,
|
490 |
+
"logits/chosen": -3.0116381645202637,
|
491 |
+
"logits/rejected": -3.012748956680298,
|
492 |
+
"logps/chosen": -310.517822265625,
|
493 |
+
"logps/rejected": -256.17578125,
|
494 |
+
"loss": 0.6583,
|
495 |
+
"rewards/accuracies": 0.65625,
|
496 |
+
"rewards/chosen": 0.023617586120963097,
|
497 |
+
"rewards/margins": 0.07958104461431503,
|
498 |
+
"rewards/rejected": -0.055963464081287384,
|
499 |
"step": 340
|
500 |
},
|
501 |
{
|
502 |
"epoch": 0.72,
|
503 |
"learning_rate": 1.5481651376146786e-07,
|
504 |
+
"logits/chosen": -3.015288829803467,
|
505 |
+
"logits/rejected": -3.035534381866455,
|
506 |
+
"logps/chosen": -238.67788696289062,
|
507 |
+
"logps/rejected": -216.6863250732422,
|
508 |
+
"loss": 0.6575,
|
509 |
+
"rewards/accuracies": 0.6812499761581421,
|
510 |
+
"rewards/chosen": 0.016075262799859047,
|
511 |
+
"rewards/margins": 0.07468070089817047,
|
512 |
+
"rewards/rejected": -0.05860542505979538,
|
513 |
"step": 350
|
514 |
},
|
515 |
{
|
516 |
"epoch": 0.74,
|
517 |
"learning_rate": 1.43348623853211e-07,
|
518 |
+
"logits/chosen": -2.999647855758667,
|
519 |
+
"logits/rejected": -2.999812602996826,
|
520 |
+
"logps/chosen": -260.34814453125,
|
521 |
+
"logps/rejected": -228.0465545654297,
|
522 |
+
"loss": 0.6576,
|
523 |
+
"rewards/accuracies": 0.6937500238418579,
|
524 |
+
"rewards/chosen": 0.010297578759491444,
|
525 |
+
"rewards/margins": 0.07942849397659302,
|
526 |
+
"rewards/rejected": -0.06913091242313385,
|
527 |
"step": 360
|
528 |
},
|
529 |
{
|
530 |
"epoch": 0.76,
|
531 |
"learning_rate": 1.318807339449541e-07,
|
532 |
+
"logits/chosen": -3.029534101486206,
|
533 |
+
"logits/rejected": -3.0314173698425293,
|
534 |
+
"logps/chosen": -284.08721923828125,
|
535 |
+
"logps/rejected": -248.7538604736328,
|
536 |
+
"loss": 0.6532,
|
537 |
+
"rewards/accuracies": 0.7250000238418579,
|
538 |
+
"rewards/chosen": 0.036643363535404205,
|
539 |
+
"rewards/margins": 0.10196901857852936,
|
540 |
+
"rewards/rejected": -0.06532564014196396,
|
541 |
"step": 370
|
542 |
},
|
543 |
{
|
544 |
"epoch": 0.78,
|
545 |
"learning_rate": 1.2041284403669725e-07,
|
546 |
+
"logits/chosen": -3.0199809074401855,
|
547 |
+
"logits/rejected": -3.012413501739502,
|
548 |
+
"logps/chosen": -237.28573608398438,
|
549 |
+
"logps/rejected": -243.95590209960938,
|
550 |
+
"loss": 0.6538,
|
551 |
+
"rewards/accuracies": 0.625,
|
552 |
+
"rewards/chosen": 0.012126882560551167,
|
553 |
+
"rewards/margins": 0.08107715100049973,
|
554 |
+
"rewards/rejected": -0.06895027309656143,
|
555 |
"step": 380
|
556 |
},
|
557 |
{
|
558 |
"epoch": 0.8,
|
559 |
"learning_rate": 1.0894495412844036e-07,
|
560 |
+
"logits/chosen": -2.9979848861694336,
|
561 |
+
"logits/rejected": -3.015382766723633,
|
562 |
+
"logps/chosen": -312.633544921875,
|
563 |
+
"logps/rejected": -235.29806518554688,
|
564 |
+
"loss": 0.6503,
|
565 |
+
"rewards/accuracies": 0.6875,
|
566 |
+
"rewards/chosen": 0.013257297687232494,
|
567 |
+
"rewards/margins": 0.09175875037908554,
|
568 |
+
"rewards/rejected": -0.07850147038698196,
|
569 |
"step": 390
|
570 |
},
|
571 |
{
|
572 |
"epoch": 0.82,
|
573 |
"learning_rate": 9.747706422018348e-08,
|
574 |
+
"logits/chosen": -3.048086166381836,
|
575 |
+
"logits/rejected": -3.0504872798919678,
|
576 |
+
"logps/chosen": -278.25189208984375,
|
577 |
+
"logps/rejected": -248.26510620117188,
|
578 |
+
"loss": 0.6511,
|
579 |
+
"rewards/accuracies": 0.6625000238418579,
|
580 |
+
"rewards/chosen": 0.01932488940656185,
|
581 |
+
"rewards/margins": 0.07586248964071274,
|
582 |
+
"rewards/rejected": -0.05653759837150574,
|
583 |
"step": 400
|
584 |
},
|
585 |
{
|
586 |
"epoch": 0.85,
|
587 |
"learning_rate": 8.60091743119266e-08,
|
588 |
+
"logits/chosen": -3.0442748069763184,
|
589 |
+
"logits/rejected": -3.0469086170196533,
|
590 |
+
"logps/chosen": -291.3175048828125,
|
591 |
+
"logps/rejected": -228.79153442382812,
|
592 |
+
"loss": 0.6492,
|
593 |
+
"rewards/accuracies": 0.768750011920929,
|
594 |
+
"rewards/chosen": 0.026980062946677208,
|
595 |
+
"rewards/margins": 0.10874257236719131,
|
596 |
+
"rewards/rejected": -0.08176250755786896,
|
597 |
"step": 410
|
598 |
},
|
599 |
{
|
600 |
"epoch": 0.87,
|
601 |
"learning_rate": 7.454128440366971e-08,
|
602 |
+
"logits/chosen": -3.0467171669006348,
|
603 |
+
"logits/rejected": -3.0507471561431885,
|
604 |
+
"logps/chosen": -274.8234558105469,
|
605 |
+
"logps/rejected": -227.38638305664062,
|
606 |
+
"loss": 0.6509,
|
607 |
+
"rewards/accuracies": 0.6812499761581421,
|
608 |
+
"rewards/chosen": 0.021679330617189407,
|
609 |
+
"rewards/margins": 0.10154237598180771,
|
610 |
+
"rewards/rejected": -0.0798630565404892,
|
611 |
"step": 420
|
612 |
},
|
613 |
{
|
614 |
"epoch": 0.89,
|
615 |
"learning_rate": 6.307339449541284e-08,
|
616 |
+
"logits/chosen": -3.0327906608581543,
|
617 |
+
"logits/rejected": -3.0403802394866943,
|
618 |
+
"logps/chosen": -256.6832580566406,
|
619 |
+
"logps/rejected": -259.84295654296875,
|
620 |
+
"loss": 0.651,
|
621 |
+
"rewards/accuracies": 0.6937500238418579,
|
622 |
+
"rewards/chosen": 0.014505205675959587,
|
623 |
+
"rewards/margins": 0.08562619239091873,
|
624 |
+
"rewards/rejected": -0.07112099230289459,
|
625 |
"step": 430
|
626 |
},
|
627 |
{
|
628 |
"epoch": 0.91,
|
629 |
"learning_rate": 5.1605504587155966e-08,
|
630 |
+
"logits/chosen": -3.0034422874450684,
|
631 |
+
"logits/rejected": -3.013791799545288,
|
632 |
+
"logps/chosen": -301.6542053222656,
|
633 |
+
"logps/rejected": -234.38381958007812,
|
634 |
+
"loss": 0.6493,
|
635 |
+
"rewards/accuracies": 0.699999988079071,
|
636 |
+
"rewards/chosen": 0.01893182098865509,
|
637 |
+
"rewards/margins": 0.12277624756097794,
|
638 |
+
"rewards/rejected": -0.10384440422058105,
|
639 |
"step": 440
|
640 |
},
|
641 |
{
|
642 |
"epoch": 0.93,
|
643 |
"learning_rate": 4.0137614678899086e-08,
|
644 |
+
"logits/chosen": -3.0094974040985107,
|
645 |
+
"logits/rejected": -2.9743194580078125,
|
646 |
+
"logps/chosen": -290.7796630859375,
|
647 |
+
"logps/rejected": -238.8968963623047,
|
648 |
+
"loss": 0.6521,
|
649 |
+
"rewards/accuracies": 0.699999988079071,
|
650 |
+
"rewards/chosen": 0.029885241761803627,
|
651 |
+
"rewards/margins": 0.11550422757863998,
|
652 |
+
"rewards/rejected": -0.08561898022890091,
|
653 |
"step": 450
|
654 |
},
|
655 |
{
|
656 |
"epoch": 0.95,
|
657 |
"learning_rate": 2.86697247706422e-08,
|
658 |
+
"logits/chosen": -3.043740749359131,
|
659 |
+
"logits/rejected": -3.035067081451416,
|
660 |
+
"logps/chosen": -258.144287109375,
|
661 |
+
"logps/rejected": -234.55081176757812,
|
662 |
+
"loss": 0.6483,
|
663 |
+
"rewards/accuracies": 0.6812499761581421,
|
664 |
+
"rewards/chosen": 0.01103687472641468,
|
665 |
+
"rewards/margins": 0.0873931497335434,
|
666 |
+
"rewards/rejected": -0.07635627686977386,
|
667 |
"step": 460
|
668 |
},
|
669 |
{
|
670 |
"epoch": 0.97,
|
671 |
"learning_rate": 1.720183486238532e-08,
|
672 |
+
"logits/chosen": -3.0199711322784424,
|
673 |
+
"logits/rejected": -3.026895761489868,
|
674 |
+
"logps/chosen": -288.91119384765625,
|
675 |
+
"logps/rejected": -242.3592071533203,
|
676 |
+
"loss": 0.6511,
|
677 |
+
"rewards/accuracies": 0.6812499761581421,
|
678 |
+
"rewards/chosen": 0.03277132660150528,
|
679 |
+
"rewards/margins": 0.10831280797719955,
|
680 |
+
"rewards/rejected": -0.07554147392511368,
|
681 |
"step": 470
|
682 |
},
|
683 |
{
|
684 |
"epoch": 0.99,
|
685 |
"learning_rate": 5.73394495412844e-09,
|
686 |
+
"logits/chosen": -3.041350841522217,
|
687 |
+
"logits/rejected": -3.058216094970703,
|
688 |
+
"logps/chosen": -258.9853820800781,
|
689 |
+
"logps/rejected": -226.7718048095703,
|
690 |
+
"loss": 0.6488,
|
691 |
+
"rewards/accuracies": 0.6875,
|
692 |
+
"rewards/chosen": 0.022982869297266006,
|
693 |
+
"rewards/margins": 0.09263849258422852,
|
694 |
+
"rewards/rejected": -0.06965561956167221,
|
695 |
"step": 480
|
696 |
},
|
697 |
{
|
698 |
"epoch": 1.0,
|
699 |
+
"eval_logits/chosen": -3.07612943649292,
|
700 |
+
"eval_logits/rejected": -3.056239604949951,
|
701 |
+
"eval_logps/chosen": -271.6427917480469,
|
702 |
+
"eval_logps/rejected": -224.8079376220703,
|
703 |
+
"eval_loss": 0.6488261818885803,
|
704 |
+
"eval_rewards/accuracies": 0.7109375,
|
705 |
+
"eval_rewards/chosen": 0.034067459404468536,
|
706 |
+
"eval_rewards/margins": 0.11610361933708191,
|
707 |
+
"eval_rewards/rejected": -0.08203616738319397,
|
708 |
+
"eval_runtime": 255.1726,
|
709 |
+
"eval_samples_per_second": 7.838,
|
710 |
+
"eval_steps_per_second": 0.063,
|
711 |
"step": 485
|
712 |
},
|
713 |
{
|
714 |
"epoch": 1.0,
|
715 |
"step": 485,
|
716 |
"total_flos": 0.0,
|
717 |
+
"train_loss": 0.6667533972828659,
|
718 |
+
"train_runtime": 15505.6746,
|
719 |
+
"train_samples_per_second": 3.996,
|
720 |
"train_steps_per_second": 0.031
|
721 |
}
|
722 |
],
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5688
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0e4c37c4ba74a7c42b124e93bdaa61e543cd3533851bf4e87301d3ef2e466cd
|
3 |
size 5688
|