Model save
Browse files- README.md +27 -26
- all_results.json +18 -4
- eval_results.json +17 -0
- runs/Sep20_04-14-32_xe8545-a100-29/events.out.tfevents.1726805549.xe8545-a100-29.124074.1 +3 -0
- runs/Sep20_23-12-02_xe8545-a100-31/events.out.tfevents.1726867169.xe8545-a100-31.109672.0 +3 -0
- train_results.json +4 -4
- trainer_state.json +4 -4
- training_args.bin +1 -1
README.md
CHANGED
@@ -3,6 +3,7 @@ library_name: transformers
|
|
3 |
tags:
|
4 |
- trl
|
5 |
- cpo
|
|
|
6 |
- generated_from_trainer
|
7 |
model-index:
|
8 |
- name: OpenELM-1_1B-CPO
|
@@ -16,16 +17,16 @@ should probably proofread and complete it, then remove this comment. -->
|
|
16 |
|
17 |
This model was trained from scratch on an unknown dataset.
|
18 |
It achieves the following results on the evaluation set:
|
|
|
|
|
|
|
|
|
19 |
- Loss: 2.1904
|
20 |
-
-
|
21 |
-
- Rewards/rejected: -4.4375
|
22 |
- Rewards/accuracies: 0.5918
|
|
|
23 |
- Rewards/margins: 0.8008
|
24 |
-
-
|
25 |
-
- Logps/chosen: -364.0
|
26 |
-
- Logits/rejected: -7.5312
|
27 |
-
- Logits/chosen: -8.875
|
28 |
-
- Nll Loss: 1.1719
|
29 |
|
30 |
## Model description
|
31 |
|
@@ -71,25 +72,25 @@ The following hyperparameters were used during training:
|
|
71 |
| 2.2554 | 0.7326 | 700 | -9.6875 | -8.5625 | -374.0 | -416.0 | 2.1848 | 1.2031 | 0.5664 | -3.7344 | 0.4258 | -4.1562 |
|
72 |
| 2.0796 | 0.8373 | 800 | -7.8438 | -7.0312 | -346.0 | -374.0 | 2.1224 | 1.1172 | 0.5469 | -3.4531 | 0.2852 | -3.75 |
|
73 |
| 2.1021 | 0.9419 | 900 | -6.2812 | -5.2812 | -350.0 | -390.0 | 2.1099 | 1.1328 | 0.5723 | -3.5 | 0.4062 | -3.9062 |
|
74 |
-
| 1.5182 | 1.0471 | 1000 |
|
75 |
-
| 1.4917 | 1.1518 | 1100 |
|
76 |
-
| 1.5219 | 1.2564 | 1200 |
|
77 |
-
| 1.5292 | 1.3611 | 1300 |
|
78 |
-
| 1.4257 | 1.4657 | 1400 |
|
79 |
-
| 1.4366 | 1.5704 | 1500 |
|
80 |
-
| 1.5246 | 1.6750 | 1600 |
|
81 |
-
| 1.4534 | 1.7797 | 1700 |
|
82 |
-
| 1.4551 | 1.8844 | 1800 |
|
83 |
-
| 1.4969 | 1.9890 | 1900 |
|
84 |
-
| 0.9984 | 2.0937 | 2000 |
|
85 |
-
| 0.9885 | 2.1983 | 2100 |
|
86 |
-
| 0.9814 | 2.3030 | 2200 |
|
87 |
-
| 0.9844 | 2.4076 | 2300 |
|
88 |
-
| 0.9931 | 2.5123 | 2400 |
|
89 |
-
| 0.9537 | 2.6170 | 2500 |
|
90 |
-
| 0.9512 | 2.7216 | 2600 |
|
91 |
-
| 0.9604 | 2.8263 | 2700 |
|
92 |
-
| 1.0208 | 2.9309 | 2800 |
|
93 |
|
94 |
|
95 |
### Framework versions
|
|
|
3 |
tags:
|
4 |
- trl
|
5 |
- cpo
|
6 |
+
- alignment-handbook
|
7 |
- generated_from_trainer
|
8 |
model-index:
|
9 |
- name: OpenELM-1_1B-CPO
|
|
|
17 |
|
18 |
This model was trained from scratch on an unknown dataset.
|
19 |
It achieves the following results on the evaluation set:
|
20 |
+
- Logits/chosen: -8.875
|
21 |
+
- Logits/rejected: -7.5312
|
22 |
+
- Logps/chosen: -364.0
|
23 |
+
- Logps/rejected: -444.0
|
24 |
- Loss: 2.1904
|
25 |
+
- Nll Loss: 1.1719
|
|
|
26 |
- Rewards/accuracies: 0.5918
|
27 |
+
- Rewards/chosen: -3.6406
|
28 |
- Rewards/margins: 0.8008
|
29 |
+
- Rewards/rejected: -4.4375
|
|
|
|
|
|
|
|
|
30 |
|
31 |
## Model description
|
32 |
|
|
|
72 |
| 2.2554 | 0.7326 | 700 | -9.6875 | -8.5625 | -374.0 | -416.0 | 2.1848 | 1.2031 | 0.5664 | -3.7344 | 0.4258 | -4.1562 |
|
73 |
| 2.0796 | 0.8373 | 800 | -7.8438 | -7.0312 | -346.0 | -374.0 | 2.1224 | 1.1172 | 0.5469 | -3.4531 | 0.2852 | -3.75 |
|
74 |
| 2.1021 | 0.9419 | 900 | -6.2812 | -5.2812 | -350.0 | -390.0 | 2.1099 | 1.1328 | 0.5723 | -3.5 | 0.4062 | -3.9062 |
|
75 |
+
| 1.5182 | 1.0471 | 1000 | -10.625 | -9.375 | -350.0 | -386.0 | 2.1662 | 1.125 | 0.5664 | -3.5 | 0.3633 | -3.8594 |
|
76 |
+
| 1.4917 | 1.1518 | 1100 | -7.875 | -6.4688 | -356.0 | -400.0 | 2.1588 | 1.1484 | 0.5703 | -3.5625 | 0.4395 | -4.0 |
|
77 |
+
| 1.5219 | 1.2564 | 1200 | -7.7812 | -6.6562 | -364.0 | -420.0 | 2.1449 | 1.1719 | 0.5938 | -3.625 | 0.5586 | -4.1875 |
|
78 |
+
| 1.5292 | 1.3611 | 1300 | -8.875 | -7.75 | -354.0 | -402.0 | 2.1489 | 1.1406 | 0.5742 | -3.5312 | 0.4785 | -4.0 |
|
79 |
+
| 1.4257 | 1.4657 | 1400 | -9.25 | -7.7188 | -358.0 | -410.0 | 2.1193 | 1.1562 | 0.5801 | -3.5781 | 0.5156 | -4.0938 |
|
80 |
+
| 1.4366 | 1.5704 | 1500 | -8.9375 | -7.6875 | -358.0 | -416.0 | 2.0983 | 1.1562 | 0.5898 | -3.5938 | 0.5586 | -4.1562 |
|
81 |
+
| 1.5246 | 1.6750 | 1600 | -6.9062 | -5.4688 | -358.0 | -420.0 | 2.1191 | 1.1562 | 0.5938 | -3.5781 | 0.625 | -4.2188 |
|
82 |
+
| 1.4534 | 1.7797 | 1700 | -10.0625 | -9.0625 | -348.0 | -404.0 | 2.0829 | 1.1172 | 0.5762 | -3.4688 | 0.5625 | -4.0312 |
|
83 |
+
| 1.4551 | 1.8844 | 1800 | -8.1875 | -6.8438 | -356.0 | -416.0 | 2.1033 | 1.1484 | 0.5898 | -3.5625 | 0.6016 | -4.1562 |
|
84 |
+
| 1.4969 | 1.9890 | 1900 | -9.3125 | -8.125 | -354.0 | -412.0 | 2.1046 | 1.1406 | 0.5762 | -3.5312 | 0.5938 | -4.125 |
|
85 |
+
| 0.9984 | 2.0937 | 2000 | -9.1875 | -7.9375 | -364.0 | -428.0 | 2.1806 | 1.1719 | 0.5781 | -3.6406 | 0.6367 | -4.2812 |
|
86 |
+
| 0.9885 | 2.1983 | 2100 | -8.6875 | -7.4062 | -370.0 | -448.0 | 2.1927 | 1.1875 | 0.5801 | -3.6875 | 0.7930 | -4.5 |
|
87 |
+
| 0.9814 | 2.3030 | 2200 | -8.8125 | -7.5 | -362.0 | -436.0 | 2.1867 | 1.1719 | 0.5742 | -3.625 | 0.7266 | -4.3438 |
|
88 |
+
| 0.9844 | 2.4076 | 2300 | -8.375 | -7.125 | -368.0 | -452.0 | 2.1905 | 1.1875 | 0.5996 | -3.6875 | 0.8438 | -4.5312 |
|
89 |
+
| 0.9931 | 2.5123 | 2400 | -8.6875 | -7.375 | -364.0 | -442.0 | 2.1843 | 1.1719 | 0.5820 | -3.6406 | 0.7930 | -4.4375 |
|
90 |
+
| 0.9537 | 2.6170 | 2500 | -8.8125 | -7.5 | -364.0 | -446.0 | 2.1907 | 1.1719 | 0.5898 | -3.6406 | 0.8125 | -4.4688 |
|
91 |
+
| 0.9512 | 2.7216 | 2600 | -8.8125 | -7.5 | -364.0 | -446.0 | 2.1918 | 1.1719 | 0.5898 | -3.6406 | 0.8086 | -4.4375 |
|
92 |
+
| 0.9604 | 2.8263 | 2700 | -8.875 | -7.5312 | -364.0 | -442.0 | 2.1906 | 1.1719 | 0.5879 | -3.6406 | 0.7969 | -4.4375 |
|
93 |
+
| 1.0208 | 2.9309 | 2800 | -8.875 | -7.5312 | -364.0 | -444.0 | 2.1904 | 1.1719 | 0.5918 | -3.6406 | 0.8008 | -4.4375 |
|
94 |
|
95 |
|
96 |
### Framework versions
|
all_results.json
CHANGED
@@ -1,9 +1,23 @@
|
|
1 |
{
|
2 |
"epoch": 2.998953427524856,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
"total_flos": 0.0,
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
"train_samples": 61134,
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second":
|
9 |
}
|
|
|
1 |
{
|
2 |
"epoch": 2.998953427524856,
|
3 |
+
"eval_logits/chosen": -8.875,
|
4 |
+
"eval_logits/rejected": -7.5625,
|
5 |
+
"eval_logps/chosen": -364.0,
|
6 |
+
"eval_logps/rejected": -444.0,
|
7 |
+
"eval_loss": 2.1908750534057617,
|
8 |
+
"eval_nll_loss": 1.171875,
|
9 |
+
"eval_rewards/accuracies": 0.59375,
|
10 |
+
"eval_rewards/chosen": -3.640625,
|
11 |
+
"eval_rewards/margins": 0.80078125,
|
12 |
+
"eval_rewards/rejected": -4.4375,
|
13 |
+
"eval_runtime": 23.576,
|
14 |
+
"eval_samples": 2000,
|
15 |
+
"eval_samples_per_second": 84.832,
|
16 |
+
"eval_steps_per_second": 1.357,
|
17 |
"total_flos": 0.0,
|
18 |
+
"train_loss": 0.0,
|
19 |
+
"train_runtime": 0.0505,
|
20 |
"train_samples": 61134,
|
21 |
+
"train_samples_per_second": 3630511.85,
|
22 |
+
"train_steps_per_second": 56713.757
|
23 |
}
|
eval_results.json
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 2.998953427524856,
|
3 |
+
"eval_logits/chosen": -8.875,
|
4 |
+
"eval_logits/rejected": -7.5625,
|
5 |
+
"eval_logps/chosen": -364.0,
|
6 |
+
"eval_logps/rejected": -444.0,
|
7 |
+
"eval_loss": 2.1908750534057617,
|
8 |
+
"eval_nll_loss": 1.171875,
|
9 |
+
"eval_rewards/accuracies": 0.59375,
|
10 |
+
"eval_rewards/chosen": -3.640625,
|
11 |
+
"eval_rewards/margins": 0.80078125,
|
12 |
+
"eval_rewards/rejected": -4.4375,
|
13 |
+
"eval_runtime": 23.576,
|
14 |
+
"eval_samples": 2000,
|
15 |
+
"eval_samples_per_second": 84.832,
|
16 |
+
"eval_steps_per_second": 1.357
|
17 |
+
}
|
runs/Sep20_04-14-32_xe8545-a100-29/events.out.tfevents.1726805549.xe8545-a100-29.124074.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77a99cbf71f6b576c069c77d96e88aa5ec00e6e4f01f07e4863a97144431278e
|
3 |
+
size 880
|
runs/Sep20_23-12-02_xe8545-a100-31/events.out.tfevents.1726867169.xe8545-a100-31.109672.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09db576e0b27ae0a2ac082d453f760a9ff6c682a555caa512c10dac78c564c3e
|
3 |
+
size 7015
|
train_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"epoch": 2.998953427524856,
|
3 |
"total_flos": 0.0,
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
"train_samples": 61134,
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second":
|
9 |
}
|
|
|
1 |
{
|
2 |
"epoch": 2.998953427524856,
|
3 |
"total_flos": 0.0,
|
4 |
+
"train_loss": 0.0,
|
5 |
+
"train_runtime": 0.0505,
|
6 |
"train_samples": 61134,
|
7 |
+
"train_samples_per_second": 3630511.85,
|
8 |
+
"train_steps_per_second": 56713.757
|
9 |
}
|
trainer_state.json
CHANGED
@@ -5064,10 +5064,10 @@
|
|
5064 |
"epoch": 2.998953427524856,
|
5065 |
"step": 2865,
|
5066 |
"total_flos": 0.0,
|
5067 |
-
"train_loss": 0.
|
5068 |
-
"train_runtime":
|
5069 |
-
"train_samples_per_second":
|
5070 |
-
"train_steps_per_second":
|
5071 |
}
|
5072 |
],
|
5073 |
"logging_steps": 10,
|
|
|
5064 |
"epoch": 2.998953427524856,
|
5065 |
"step": 2865,
|
5066 |
"total_flos": 0.0,
|
5067 |
+
"train_loss": 0.0,
|
5068 |
+
"train_runtime": 0.0505,
|
5069 |
+
"train_samples_per_second": 3630511.85,
|
5070 |
+
"train_steps_per_second": 56713.757
|
5071 |
}
|
5072 |
],
|
5073 |
"logging_steps": 10,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7096
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e749e83dea30184efd92aea71e8b4ab47b6cf7fd4b5e008d92c123e73ae3e1ae
|
3 |
size 7096
|