CharlesLi commited on
Commit
8e8e7ca
1 Parent(s): bbb9d0c

Model save

Browse files
README.md CHANGED
@@ -3,6 +3,7 @@ library_name: transformers
3
  tags:
4
  - trl
5
  - cpo
 
6
  - generated_from_trainer
7
  model-index:
8
  - name: OpenELM-1_1B-CPO
@@ -16,16 +17,16 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model was trained from scratch on an unknown dataset.
18
  It achieves the following results on the evaluation set:
 
 
 
 
19
  - Loss: 2.1904
20
- - Rewards/chosen: -3.6406
21
- - Rewards/rejected: -4.4375
22
  - Rewards/accuracies: 0.5918
 
23
  - Rewards/margins: 0.8008
24
- - Logps/rejected: -444.0
25
- - Logps/chosen: -364.0
26
- - Logits/rejected: -7.5312
27
- - Logits/chosen: -8.875
28
- - Nll Loss: 1.1719
29
 
30
  ## Model description
31
 
@@ -71,25 +72,25 @@ The following hyperparameters were used during training:
71
  | 2.2554 | 0.7326 | 700 | -9.6875 | -8.5625 | -374.0 | -416.0 | 2.1848 | 1.2031 | 0.5664 | -3.7344 | 0.4258 | -4.1562 |
72
  | 2.0796 | 0.8373 | 800 | -7.8438 | -7.0312 | -346.0 | -374.0 | 2.1224 | 1.1172 | 0.5469 | -3.4531 | 0.2852 | -3.75 |
73
  | 2.1021 | 0.9419 | 900 | -6.2812 | -5.2812 | -350.0 | -390.0 | 2.1099 | 1.1328 | 0.5723 | -3.5 | 0.4062 | -3.9062 |
74
- | 1.5182 | 1.0471 | 1000 | 2.1662 | -3.5 | -3.8594 | 0.5664 | 0.3633 | -386.0 | -350.0 | -9.375 | -10.625 | 1.125 |
75
- | 1.4917 | 1.1518 | 1100 | 2.1588 | -3.5625 | -4.0 | 0.5703 | 0.4395 | -400.0 | -356.0 | -6.4688 | -7.875 | 1.1484 |
76
- | 1.5219 | 1.2564 | 1200 | 2.1449 | -3.625 | -4.1875 | 0.5938 | 0.5586 | -420.0 | -364.0 | -6.6562 | -7.7812 | 1.1719 |
77
- | 1.5292 | 1.3611 | 1300 | 2.1489 | -3.5312 | -4.0 | 0.5742 | 0.4785 | -402.0 | -354.0 | -7.75 | -8.875 | 1.1406 |
78
- | 1.4257 | 1.4657 | 1400 | 2.1193 | -3.5781 | -4.0938 | 0.5801 | 0.5156 | -410.0 | -358.0 | -7.7188 | -9.25 | 1.1562 |
79
- | 1.4366 | 1.5704 | 1500 | 2.0983 | -3.5938 | -4.1562 | 0.5898 | 0.5586 | -416.0 | -358.0 | -7.6875 | -8.9375 | 1.1562 |
80
- | 1.5246 | 1.6750 | 1600 | 2.1191 | -3.5781 | -4.2188 | 0.5938 | 0.625 | -420.0 | -358.0 | -5.4688 | -6.9062 | 1.1562 |
81
- | 1.4534 | 1.7797 | 1700 | 2.0829 | -3.4688 | -4.0312 | 0.5762 | 0.5625 | -404.0 | -348.0 | -9.0625 | -10.0625 | 1.1172 |
82
- | 1.4551 | 1.8844 | 1800 | 2.1033 | -3.5625 | -4.1562 | 0.5898 | 0.6016 | -416.0 | -356.0 | -6.8438 | -8.1875 | 1.1484 |
83
- | 1.4969 | 1.9890 | 1900 | 2.1046 | -3.5312 | -4.125 | 0.5762 | 0.5938 | -412.0 | -354.0 | -8.125 | -9.3125 | 1.1406 |
84
- | 0.9984 | 2.0937 | 2000 | 2.1806 | -3.6406 | -4.2812 | 0.5781 | 0.6367 | -428.0 | -364.0 | -7.9375 | -9.1875 | 1.1719 |
85
- | 0.9885 | 2.1983 | 2100 | 2.1927 | -3.6875 | -4.5 | 0.5801 | 0.7930 | -448.0 | -370.0 | -7.4062 | -8.6875 | 1.1875 |
86
- | 0.9814 | 2.3030 | 2200 | 2.1867 | -3.625 | -4.3438 | 0.5742 | 0.7266 | -436.0 | -362.0 | -7.5 | -8.8125 | 1.1719 |
87
- | 0.9844 | 2.4076 | 2300 | 2.1905 | -3.6875 | -4.5312 | 0.5996 | 0.8438 | -452.0 | -368.0 | -7.125 | -8.375 | 1.1875 |
88
- | 0.9931 | 2.5123 | 2400 | 2.1843 | -3.6406 | -4.4375 | 0.5820 | 0.7930 | -442.0 | -364.0 | -7.375 | -8.6875 | 1.1719 |
89
- | 0.9537 | 2.6170 | 2500 | 2.1907 | -3.6406 | -4.4688 | 0.5898 | 0.8125 | -446.0 | -364.0 | -7.5 | -8.8125 | 1.1719 |
90
- | 0.9512 | 2.7216 | 2600 | 2.1918 | -3.6406 | -4.4375 | 0.5898 | 0.8086 | -446.0 | -364.0 | -7.5 | -8.8125 | 1.1719 |
91
- | 0.9604 | 2.8263 | 2700 | 2.1906 | -3.6406 | -4.4375 | 0.5879 | 0.7969 | -442.0 | -364.0 | -7.5312 | -8.875 | 1.1719 |
92
- | 1.0208 | 2.9309 | 2800 | 2.1904 | -3.6406 | -4.4375 | 0.5918 | 0.8008 | -444.0 | -364.0 | -7.5312 | -8.875 | 1.1719 |
93
 
94
 
95
  ### Framework versions
 
3
  tags:
4
  - trl
5
  - cpo
6
+ - alignment-handbook
7
  - generated_from_trainer
8
  model-index:
9
  - name: OpenELM-1_1B-CPO
 
17
 
18
  This model was trained from scratch on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Logits/chosen: -8.875
21
+ - Logits/rejected: -7.5312
22
+ - Logps/chosen: -364.0
23
+ - Logps/rejected: -444.0
24
  - Loss: 2.1904
25
+ - Nll Loss: 1.1719
 
26
  - Rewards/accuracies: 0.5918
27
+ - Rewards/chosen: -3.6406
28
  - Rewards/margins: 0.8008
29
+ - Rewards/rejected: -4.4375
 
 
 
 
30
 
31
  ## Model description
32
 
 
72
  | 2.2554 | 0.7326 | 700 | -9.6875 | -8.5625 | -374.0 | -416.0 | 2.1848 | 1.2031 | 0.5664 | -3.7344 | 0.4258 | -4.1562 |
73
  | 2.0796 | 0.8373 | 800 | -7.8438 | -7.0312 | -346.0 | -374.0 | 2.1224 | 1.1172 | 0.5469 | -3.4531 | 0.2852 | -3.75 |
74
  | 2.1021 | 0.9419 | 900 | -6.2812 | -5.2812 | -350.0 | -390.0 | 2.1099 | 1.1328 | 0.5723 | -3.5 | 0.4062 | -3.9062 |
75
+ | 1.5182 | 1.0471 | 1000 | -10.625 | -9.375 | -350.0 | -386.0 | 2.1662 | 1.125 | 0.5664 | -3.5 | 0.3633 | -3.8594 |
76
+ | 1.4917 | 1.1518 | 1100 | -7.875 | -6.4688 | -356.0 | -400.0 | 2.1588 | 1.1484 | 0.5703 | -3.5625 | 0.4395 | -4.0 |
77
+ | 1.5219 | 1.2564 | 1200 | -7.7812 | -6.6562 | -364.0 | -420.0 | 2.1449 | 1.1719 | 0.5938 | -3.625 | 0.5586 | -4.1875 |
78
+ | 1.5292 | 1.3611 | 1300 | -8.875 | -7.75 | -354.0 | -402.0 | 2.1489 | 1.1406 | 0.5742 | -3.5312 | 0.4785 | -4.0 |
79
+ | 1.4257 | 1.4657 | 1400 | -9.25 | -7.7188 | -358.0 | -410.0 | 2.1193 | 1.1562 | 0.5801 | -3.5781 | 0.5156 | -4.0938 |
80
+ | 1.4366 | 1.5704 | 1500 | -8.9375 | -7.6875 | -358.0 | -416.0 | 2.0983 | 1.1562 | 0.5898 | -3.5938 | 0.5586 | -4.1562 |
81
+ | 1.5246 | 1.6750 | 1600 | -6.9062 | -5.4688 | -358.0 | -420.0 | 2.1191 | 1.1562 | 0.5938 | -3.5781 | 0.625 | -4.2188 |
82
+ | 1.4534 | 1.7797 | 1700 | -10.0625 | -9.0625 | -348.0 | -404.0 | 2.0829 | 1.1172 | 0.5762 | -3.4688 | 0.5625 | -4.0312 |
83
+ | 1.4551 | 1.8844 | 1800 | -8.1875 | -6.8438 | -356.0 | -416.0 | 2.1033 | 1.1484 | 0.5898 | -3.5625 | 0.6016 | -4.1562 |
84
+ | 1.4969 | 1.9890 | 1900 | -9.3125 | -8.125 | -354.0 | -412.0 | 2.1046 | 1.1406 | 0.5762 | -3.5312 | 0.5938 | -4.125 |
85
+ | 0.9984 | 2.0937 | 2000 | -9.1875 | -7.9375 | -364.0 | -428.0 | 2.1806 | 1.1719 | 0.5781 | -3.6406 | 0.6367 | -4.2812 |
86
+ | 0.9885 | 2.1983 | 2100 | -8.6875 | -7.4062 | -370.0 | -448.0 | 2.1927 | 1.1875 | 0.5801 | -3.6875 | 0.7930 | -4.5 |
87
+ | 0.9814 | 2.3030 | 2200 | -8.8125 | -7.5 | -362.0 | -436.0 | 2.1867 | 1.1719 | 0.5742 | -3.625 | 0.7266 | -4.3438 |
88
+ | 0.9844 | 2.4076 | 2300 | -8.375 | -7.125 | -368.0 | -452.0 | 2.1905 | 1.1875 | 0.5996 | -3.6875 | 0.8438 | -4.5312 |
89
+ | 0.9931 | 2.5123 | 2400 | -8.6875 | -7.375 | -364.0 | -442.0 | 2.1843 | 1.1719 | 0.5820 | -3.6406 | 0.7930 | -4.4375 |
90
+ | 0.9537 | 2.6170 | 2500 | -8.8125 | -7.5 | -364.0 | -446.0 | 2.1907 | 1.1719 | 0.5898 | -3.6406 | 0.8125 | -4.4688 |
91
+ | 0.9512 | 2.7216 | 2600 | -8.8125 | -7.5 | -364.0 | -446.0 | 2.1918 | 1.1719 | 0.5898 | -3.6406 | 0.8086 | -4.4375 |
92
+ | 0.9604 | 2.8263 | 2700 | -8.875 | -7.5312 | -364.0 | -442.0 | 2.1906 | 1.1719 | 0.5879 | -3.6406 | 0.7969 | -4.4375 |
93
+ | 1.0208 | 2.9309 | 2800 | -8.875 | -7.5312 | -364.0 | -444.0 | 2.1904 | 1.1719 | 0.5918 | -3.6406 | 0.8008 | -4.4375 |
94
 
95
 
96
  ### Framework versions
all_results.json CHANGED
@@ -1,9 +1,23 @@
1
  {
2
  "epoch": 2.998953427524856,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "total_flos": 0.0,
4
- "train_loss": 0.819025840559555,
5
- "train_runtime": 6434.1562,
6
  "train_samples": 61134,
7
- "train_samples_per_second": 28.504,
8
- "train_steps_per_second": 0.445
9
  }
 
1
  {
2
  "epoch": 2.998953427524856,
3
+ "eval_logits/chosen": -8.875,
4
+ "eval_logits/rejected": -7.5625,
5
+ "eval_logps/chosen": -364.0,
6
+ "eval_logps/rejected": -444.0,
7
+ "eval_loss": 2.1908750534057617,
8
+ "eval_nll_loss": 1.171875,
9
+ "eval_rewards/accuracies": 0.59375,
10
+ "eval_rewards/chosen": -3.640625,
11
+ "eval_rewards/margins": 0.80078125,
12
+ "eval_rewards/rejected": -4.4375,
13
+ "eval_runtime": 23.576,
14
+ "eval_samples": 2000,
15
+ "eval_samples_per_second": 84.832,
16
+ "eval_steps_per_second": 1.357,
17
  "total_flos": 0.0,
18
+ "train_loss": 0.0,
19
+ "train_runtime": 0.0505,
20
  "train_samples": 61134,
21
+ "train_samples_per_second": 3630511.85,
22
+ "train_steps_per_second": 56713.757
23
  }
eval_results.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.998953427524856,
3
+ "eval_logits/chosen": -8.875,
4
+ "eval_logits/rejected": -7.5625,
5
+ "eval_logps/chosen": -364.0,
6
+ "eval_logps/rejected": -444.0,
7
+ "eval_loss": 2.1908750534057617,
8
+ "eval_nll_loss": 1.171875,
9
+ "eval_rewards/accuracies": 0.59375,
10
+ "eval_rewards/chosen": -3.640625,
11
+ "eval_rewards/margins": 0.80078125,
12
+ "eval_rewards/rejected": -4.4375,
13
+ "eval_runtime": 23.576,
14
+ "eval_samples": 2000,
15
+ "eval_samples_per_second": 84.832,
16
+ "eval_steps_per_second": 1.357
17
+ }
runs/Sep20_04-14-32_xe8545-a100-29/events.out.tfevents.1726805549.xe8545-a100-29.124074.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77a99cbf71f6b576c069c77d96e88aa5ec00e6e4f01f07e4863a97144431278e
3
+ size 880
runs/Sep20_23-12-02_xe8545-a100-31/events.out.tfevents.1726867169.xe8545-a100-31.109672.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09db576e0b27ae0a2ac082d453f760a9ff6c682a555caa512c10dac78c564c3e
3
+ size 7015
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 2.998953427524856,
3
  "total_flos": 0.0,
4
- "train_loss": 0.819025840559555,
5
- "train_runtime": 6434.1562,
6
  "train_samples": 61134,
7
- "train_samples_per_second": 28.504,
8
- "train_steps_per_second": 0.445
9
  }
 
1
  {
2
  "epoch": 2.998953427524856,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.0,
5
+ "train_runtime": 0.0505,
6
  "train_samples": 61134,
7
+ "train_samples_per_second": 3630511.85,
8
+ "train_steps_per_second": 56713.757
9
  }
trainer_state.json CHANGED
@@ -5064,10 +5064,10 @@
5064
  "epoch": 2.998953427524856,
5065
  "step": 2865,
5066
  "total_flos": 0.0,
5067
- "train_loss": 0.819025840559555,
5068
- "train_runtime": 6434.1562,
5069
- "train_samples_per_second": 28.504,
5070
- "train_steps_per_second": 0.445
5071
  }
5072
  ],
5073
  "logging_steps": 10,
 
5064
  "epoch": 2.998953427524856,
5065
  "step": 2865,
5066
  "total_flos": 0.0,
5067
+ "train_loss": 0.0,
5068
+ "train_runtime": 0.0505,
5069
+ "train_samples_per_second": 3630511.85,
5070
+ "train_steps_per_second": 56713.757
5071
  }
5072
  ],
5073
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c3a99a93ab410a845d8aecc75060557b05454f628905bfe08b3a41d17603a45
3
  size 7096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e749e83dea30184efd92aea71e8b4ab47b6cf7fd4b5e008d92c123e73ae3e1ae
3
  size 7096