CharlesLi commited on
Commit
bb7f0c0
1 Parent(s): a1c0da7

Model save

Browse files
README.md CHANGED
@@ -3,6 +3,7 @@ library_name: transformers
3
  tags:
4
  - trl
5
  - kto
 
6
  - generated_from_trainer
7
  model-index:
8
  - name: OpenELM-1_1B-KTO
@@ -16,15 +17,15 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model was trained from scratch on the None dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.4491
20
- - Rewards/chosen: -2.1686
21
- - Logps/chosen: -548.5443
22
- - Rewards/rejected: -5.4545
23
- - Logps/rejected: -840.7200
24
- - Rewards/margins: 3.2859
25
  - Kl: 0.0
26
  - Logits/chosen: -1428926080.0
27
  - Logits/rejected: -1154165888.0
 
 
 
 
 
 
28
 
29
  ## Model description
30
 
@@ -59,65 +60,65 @@ The following hyperparameters were used during training:
59
 
60
  ### Training results
61
 
62
- | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Logps/chosen | Rewards/rejected | Logps/rejected | Rewards/margins | Kl | Logits/chosen | Logits/rejected |
63
- |:-------------:|:------:|:----:|:---------------:|:--------------:|:------------:|:----------------:|:--------------:|:---------------:|:---:|:-------------:|:---------------:|
64
- | 0.4859 | 0.0523 | 100 | 0.4855 | -0.6136 | -393.0411 | -0.7551 | -370.7767 | 0.1415 | 0.0 | -3175687424.0 | -3128284928.0 |
65
- | 0.4556 | 0.1047 | 200 | 0.4658 | -1.2023 | -452.2593 | -1.9602 | -491.0910 | 0.7579 | 0.0 | -3518155520.0 | -3502177280.0 |
66
- | 0.4658 | 0.1570 | 300 | 0.4630 | -0.8430 | -416.2217 | -1.4462 | -439.7414 | 0.6032 | 0.0 | -3454242304.0 | -3428943360.0 |
67
- | 0.4543 | 0.2094 | 400 | 0.4728 | -2.3096 | -562.5413 | -2.9453 | -589.5574 | 0.6357 | 0.0 | -3808428032.0 | -3728536576.0 |
68
- | 0.4445 | 0.2617 | 500 | 0.4657 | -2.0271 | -534.3256 | -2.9015 | -585.3565 | 0.8745 | 0.0 | -3128418048.0 | -2977955840.0 |
69
- | 0.4654 | 0.3141 | 600 | 0.4658 | -1.9723 | -529.1321 | -2.8686 | -582.2695 | 0.8963 | 0.0 | -3596449280.0 | -3444389120.0 |
70
- | 0.4517 | 0.3664 | 700 | 0.4690 | -1.2099 | -452.7501 | -1.9723 | -492.3958 | 0.7624 | 0.0 | -3631068928.0 | -3583666688.0 |
71
- | 0.4701 | 0.4187 | 800 | 0.4595 | -2.1877 | -550.3810 | -3.6176 | -656.9627 | 1.4299 | 0.0 | -3269426688.0 | -3207910144.0 |
72
- | 0.4711 | 0.4711 | 900 | 0.4626 | -2.6066 | -592.4354 | -4.6290 | -758.0388 | 2.0224 | 0.0 | -3022695168.0 | -3008713984.0 |
73
- | 0.4534 | 0.5234 | 1000 | 0.4594 | -2.1704 | -548.9560 | -3.6118 | -656.3580 | 1.4414 | 0.0 | -2621240320.0 | -2359728640.0 |
74
- | 0.4428 | 0.5758 | 1100 | 0.4583 | -2.5622 | -587.9386 | -4.6416 | -759.0254 | 2.0794 | 0.0 | -2962243840.0 | -2838944512.0 |
75
- | 0.4619 | 0.6281 | 1200 | 0.4549 | -2.3843 | -570.1098 | -4.4734 | -742.3173 | 2.0891 | 0.0 | -2887944704.0 | -2875961088.0 |
76
- | 0.4627 | 0.6805 | 1300 | 0.4565 | -3.7379 | -705.3934 | -7.4207 | -1037.3665 | 3.6828 | 0.0 | -2942404096.0 | -2700332800.0 |
77
- | 0.4622 | 0.7328 | 1400 | 0.4620 | -4.5181 | -783.2637 | -7.9366 | -1089.1775 | 3.4185 | 0.0 | -3010711296.0 | -2812979968.0 |
78
- | 0.4571 | 0.7851 | 1500 | 0.4572 | -2.3622 | -567.8931 | -4.7849 | -773.6330 | 2.4227 | 0.0 | -2759985152.0 | -2638283776.0 |
79
- | 0.4714 | 0.8375 | 1600 | 0.4618 | -1.9416 | -526.0604 | -3.6785 | -662.9458 | 1.7369 | 0.0 | -2749732352.0 | -2697137152.0 |
80
- | 0.4266 | 0.8898 | 1700 | 0.4548 | -2.2239 | -554.2761 | -3.8812 | -683.2819 | 1.6573 | 0.0 | -1904080896.0 | -1675191680.0 |
81
- | 0.449 | 0.9422 | 1800 | 0.4612 | -2.9899 | -630.7531 | -4.8538 | -780.4754 | 1.8639 | 0.0 | -2063198080.0 | -1781980032.0 |
82
- | 0.4773 | 0.9945 | 1900 | 0.4612 | -3.2661 | -658.1771 | -4.9806 | -792.9826 | 1.7145 | 0.0 | -2405932544.0 | -2133635840.0 |
83
- | 0.4654 | 1.0468 | 2000 | 0.4568 | -1.9690 | -528.9421 | -3.2144 | -616.7041 | 1.2455 | 0.0 | -2678495744.0 | -2597272832.0 |
84
- | 0.4228 | 1.0992 | 2100 | 0.4540 | -2.7379 | -605.4824 | -5.3262 | -827.4172 | 2.5883 | 0.0 | -1897423232.0 | -1554289280.0 |
85
- | 0.4094 | 1.1515 | 2200 | 0.4561 | -1.6318 | -495.0579 | -2.9029 | -585.3565 | 1.2711 | 0.0 | -1938966784.0 | -1658281344.0 |
86
- | 0.3779 | 1.2039 | 2300 | 0.4564 | -1.8295 | -514.6284 | -3.3256 | -627.5883 | 1.4960 | 0.0 | -2399674624.0 | -2125380352.0 |
87
- | 0.3731 | 1.2562 | 2400 | 0.4578 | -1.6493 | -496.7679 | -3.1023 | -605.2153 | 1.4530 | 0.0 | -1886105216.0 | -1661210752.0 |
88
- | 0.3538 | 1.3086 | 2500 | 0.4578 | -1.8841 | -520.2969 | -3.4630 | -641.4639 | 1.5789 | 0.0 | -1523131520.0 | -1173539584.0 |
89
- | 0.3699 | 1.3609 | 2600 | 0.4552 | -1.3250 | -464.2454 | -2.7633 | -571.5127 | 1.4383 | 0.0 | -2620041984.0 | -2228706560.0 |
90
- | 0.3293 | 1.4132 | 2700 | 0.4508 | -2.2760 | -559.4696 | -5.0990 | -805.1397 | 2.8229 | 0.0 | -1547698176.0 | -1145178112.0 |
91
- | 0.3376 | 1.4656 | 2800 | 0.4576 | -1.6388 | -495.7229 | -3.4841 | -643.6280 | 1.8453 | 0.0 | -1543770240.0 | -1201035648.0 |
92
- | 0.3545 | 1.5179 | 2900 | 0.4523 | -4.0433 | -736.2058 | -8.7166 | -1167.2123 | 4.6733 | 0.0 | -1516407296.0 | -1264482816.0 |
93
- | 0.3399 | 1.5703 | 3000 | 0.4497 | -3.0224 | -634.2682 | -6.8772 | -983.0731 | 3.8548 | 0.0 | -2308331776.0 | -2173847808.0 |
94
- | 0.3429 | 1.6226 | 3100 | 0.4497 | -3.1008 | -641.7100 | -6.5060 | -945.9015 | 3.4052 | 0.0 | -2065728000.0 | -1775322368.0 |
95
- | 0.3005 | 1.6750 | 3200 | 0.4492 | -1.8422 | -515.8318 | -4.2420 | -719.3396 | 2.3998 | 0.0 | -2172250112.0 | -2024051328.0 |
96
- | 0.3468 | 1.7273 | 3300 | 0.4503 | -3.1821 | -650.1019 | -8.0773 | -1102.9896 | 4.8952 | 0.0 | -2299277568.0 | -2052279552.0 |
97
- | 0.3361 | 1.7796 | 3400 | 0.4488 | -2.5504 | -586.8936 | -6.5818 | -953.3486 | 4.0314 | 0.0 | -1953080960.0 | -1726854912.0 |
98
- | 0.3405 | 1.8320 | 3500 | 0.4473 | -2.6417 | -595.8555 | -6.2223 | -917.2273 | 3.5806 | 0.0 | -1600359936.0 | -1368940928.0 |
99
- | 0.362 | 1.8843 | 3600 | 0.4472 | -2.6312 | -594.8738 | -6.3789 | -933.2352 | 3.7478 | 0.0 | -1622263552.0 | -1444837888.0 |
100
- | 0.3153 | 1.9367 | 3700 | 0.4482 | -2.1611 | -548.1009 | -5.3362 | -828.2765 | 3.1750 | 0.0 | -1449431680.0 | -1236653952.0 |
101
- | 0.332 | 1.9890 | 3800 | 0.4465 | -1.8534 | -517.2568 | -4.5532 | -750.3690 | 2.6999 | 0.0 | -1527192704.0 | -1300766848.0 |
102
- | 0.3281 | 2.0414 | 3900 | 0.4463 | -2.6517 | -596.9639 | -6.5944 | -954.6852 | 3.9427 | 0.0 | -1679585792.0 | -1427328256.0 |
103
- | 0.3181 | 2.0937 | 4000 | 0.4486 | -1.9321 | -525.1103 | -4.3169 | -727.0094 | 2.3847 | 0.0 | -1560081408.0 | -1263351040.0 |
104
- | 0.2603 | 2.1460 | 4100 | 0.4512 | -1.9273 | -524.6037 | -4.2649 | -721.5356 | 2.3376 | 0.0 | -1459418112.0 | -1138054528.0 |
105
- | 0.2388 | 2.1984 | 4200 | 0.4556 | -1.5242 | -484.2593 | -3.1844 | -613.2352 | 1.6601 | 0.0 | -1377462656.0 | -1113288064.0 |
106
- | 0.224 | 2.2507 | 4300 | 0.4576 | -1.4813 | -479.9842 | -2.9584 | -590.8304 | 1.4770 | 0.0 | -1391976320.0 | -1133061248.0 |
107
- | 0.26 | 2.3031 | 4400 | 0.4562 | -1.6511 | -496.7046 | -3.3839 | -633.5714 | 1.7328 | 0.0 | -1215682432.0 | -961427712.0 |
108
- | 0.2234 | 2.3554 | 4500 | 0.4557 | -2.0918 | -540.8808 | -4.4056 | -735.6340 | 2.3138 | 0.0 | -1145577600.0 | -884432256.0 |
109
- | 0.235 | 2.4077 | 4600 | 0.4559 | -1.5702 | -488.9302 | -3.5613 | -651.3615 | 1.9911 | 0.0 | -1404625792.0 | -1132328960.0 |
110
- | 0.2246 | 2.4601 | 4700 | 0.4523 | -1.7423 | -506.0782 | -3.9668 | -691.9065 | 2.2245 | 0.0 | -1378527872.0 | -1094446976.0 |
111
- | 0.24 | 2.5124 | 4800 | 0.4519 | -1.7776 | -509.5299 | -4.0614 | -701.2312 | 2.2838 | 0.0 | -1400231808.0 | -1111490560.0 |
112
- | 0.2557 | 2.5648 | 4900 | 0.4514 | -1.9623 | -528.5304 | -4.5403 | -749.2869 | 2.5779 | 0.0 | -1397235840.0 | -1123008256.0 |
113
- | 0.2413 | 2.6171 | 5000 | 0.4506 | -1.9521 | -527.0421 | -4.7608 | -771.0870 | 2.8087 | 0.0 | -1477659904.0 | -1194644352.0 |
114
- | 0.2273 | 2.6695 | 5100 | 0.4495 | -1.9271 | -524.3503 | -4.5880 | -753.7106 | 2.6609 | 0.0 | -1420737280.0 | -1144246016.0 |
115
- | 0.2645 | 2.7218 | 5200 | 0.4501 | -1.9730 | -529.0688 | -4.7279 | -767.9682 | 2.7549 | 0.0 | -1426662528.0 | -1147708032.0 |
116
- | 0.2637 | 2.7741 | 5300 | 0.4497 | -2.1932 | -551.0460 | -5.4078 | -835.7553 | 3.2146 | 0.0 | -1414479104.0 | -1139319424.0 |
117
- | 0.2683 | 2.8265 | 5400 | 0.4496 | -2.2117 | -552.9777 | -5.5303 | -847.9125 | 3.3187 | 0.0 | -1420271232.0 | -1144445824.0 |
118
- | 0.2551 | 2.8788 | 5500 | 0.4494 | -2.1969 | -551.4577 | -5.5093 | -846.3531 | 3.3124 | 0.0 | -1425663872.0 | -1148307200.0 |
119
- | 0.2695 | 2.9312 | 5600 | 0.4490 | -2.1721 | -548.8293 | -5.4591 | -841.0065 | 3.2870 | 0.0 | -1428859520.0 | -1154099328.0 |
120
- | 0.2664 | 2.9835 | 5700 | 0.4491 | -2.1686 | -548.5443 | -5.4545 | -840.7200 | 3.2859 | 0.0 | -1428926080.0 | -1154165888.0 |
121
 
122
 
123
  ### Framework versions
 
3
  tags:
4
  - trl
5
  - kto
6
+ - alignment-handbook
7
  - generated_from_trainer
8
  model-index:
9
  - name: OpenELM-1_1B-KTO
 
17
 
18
  This model was trained from scratch on the None dataset.
19
  It achieves the following results on the evaluation set:
 
 
 
 
 
 
20
  - Kl: 0.0
21
  - Logits/chosen: -1428926080.0
22
  - Logits/rejected: -1154165888.0
23
+ - Logps/chosen: -548.5443
24
+ - Logps/rejected: -840.7200
25
+ - Loss: 0.4491
26
+ - Rewards/chosen: -2.1686
27
+ - Rewards/margins: 3.2859
28
+ - Rewards/rejected: -5.4545
29
 
30
  ## Model description
31
 
 
60
 
61
  ### Training results
62
 
63
+ | Training Loss | Epoch | Step | Kl | Logits/chosen | Logits/rejected | Logps/chosen | Logps/rejected | Validation Loss | Rewards/chosen | Rewards/margins | Rewards/rejected |
64
+ |:-------------:|:------:|:----:|:---:|:-------------:|:---------------:|:------------:|:--------------:|:---------------:|:--------------:|:---------------:|:----------------:|
65
+ | 0.4859 | 0.0523 | 100 | 0.0 | -3175687424.0 | -3128284928.0 | -393.0411 | -370.7767 | 0.4855 | -0.6136 | 0.1415 | -0.7551 |
66
+ | 0.4556 | 0.1047 | 200 | 0.0 | -3518155520.0 | -3502177280.0 | -452.2593 | -491.0910 | 0.4658 | -1.2023 | 0.7579 | -1.9602 |
67
+ | 0.4658 | 0.1570 | 300 | 0.0 | -3454242304.0 | -3428943360.0 | -416.2217 | -439.7414 | 0.4630 | -0.8430 | 0.6032 | -1.4462 |
68
+ | 0.4543 | 0.2094 | 400 | 0.0 | -3808428032.0 | -3728536576.0 | -562.5413 | -589.5574 | 0.4728 | -2.3096 | 0.6357 | -2.9453 |
69
+ | 0.4445 | 0.2617 | 500 | 0.0 | -3128418048.0 | -2977955840.0 | -534.3256 | -585.3565 | 0.4657 | -2.0271 | 0.8745 | -2.9015 |
70
+ | 0.4654 | 0.3141 | 600 | 0.0 | -3596449280.0 | -3444389120.0 | -529.1321 | -582.2695 | 0.4658 | -1.9723 | 0.8963 | -2.8686 |
71
+ | 0.4517 | 0.3664 | 700 | 0.0 | -3631068928.0 | -3583666688.0 | -452.7501 | -492.3958 | 0.4690 | -1.2099 | 0.7624 | -1.9723 |
72
+ | 0.4701 | 0.4187 | 800 | 0.0 | -3269426688.0 | -3207910144.0 | -550.3810 | -656.9627 | 0.4595 | -2.1877 | 1.4299 | -3.6176 |
73
+ | 0.4711 | 0.4711 | 900 | 0.0 | -3022695168.0 | -3008713984.0 | -592.4354 | -758.0388 | 0.4626 | -2.6066 | 2.0224 | -4.6290 |
74
+ | 0.4534 | 0.5234 | 1000 | 0.0 | -2621240320.0 | -2359728640.0 | -548.9560 | -656.3580 | 0.4594 | -2.1704 | 1.4414 | -3.6118 |
75
+ | 0.4428 | 0.5758 | 1100 | 0.0 | -2962243840.0 | -2838944512.0 | -587.9386 | -759.0254 | 0.4583 | -2.5622 | 2.0794 | -4.6416 |
76
+ | 0.4619 | 0.6281 | 1200 | 0.0 | -2887944704.0 | -2875961088.0 | -570.1098 | -742.3173 | 0.4549 | -2.3843 | 2.0891 | -4.4734 |
77
+ | 0.4627 | 0.6805 | 1300 | 0.0 | -2942404096.0 | -2700332800.0 | -705.3934 | -1037.3665 | 0.4565 | -3.7379 | 3.6828 | -7.4207 |
78
+ | 0.4622 | 0.7328 | 1400 | 0.0 | -3010711296.0 | -2812979968.0 | -783.2637 | -1089.1775 | 0.4620 | -4.5181 | 3.4185 | -7.9366 |
79
+ | 0.4571 | 0.7851 | 1500 | 0.0 | -2759985152.0 | -2638283776.0 | -567.8931 | -773.6330 | 0.4572 | -2.3622 | 2.4227 | -4.7849 |
80
+ | 0.4714 | 0.8375 | 1600 | 0.0 | -2749732352.0 | -2697137152.0 | -526.0604 | -662.9458 | 0.4618 | -1.9416 | 1.7369 | -3.6785 |
81
+ | 0.4266 | 0.8898 | 1700 | 0.0 | -1904080896.0 | -1675191680.0 | -554.2761 | -683.2819 | 0.4548 | -2.2239 | 1.6573 | -3.8812 |
82
+ | 0.449 | 0.9422 | 1800 | 0.0 | -2063198080.0 | -1781980032.0 | -630.7531 | -780.4754 | 0.4612 | -2.9899 | 1.8639 | -4.8538 |
83
+ | 0.4773 | 0.9945 | 1900 | 0.0 | -2405932544.0 | -2133635840.0 | -658.1771 | -792.9826 | 0.4612 | -3.2661 | 1.7145 | -4.9806 |
84
+ | 0.4654 | 1.0468 | 2000 | 0.0 | -2678495744.0 | -2597272832.0 | -528.9421 | -616.7041 | 0.4568 | -1.9690 | 1.2455 | -3.2144 |
85
+ | 0.4228 | 1.0992 | 2100 | 0.0 | -1897423232.0 | -1554289280.0 | -605.4824 | -827.4172 | 0.4540 | -2.7379 | 2.5883 | -5.3262 |
86
+ | 0.4094 | 1.1515 | 2200 | 0.0 | -1938966784.0 | -1658281344.0 | -495.0579 | -585.3565 | 0.4561 | -1.6318 | 1.2711 | -2.9029 |
87
+ | 0.3779 | 1.2039 | 2300 | 0.0 | -2399674624.0 | -2125380352.0 | -514.6284 | -627.5883 | 0.4564 | -1.8295 | 1.4960 | -3.3256 |
88
+ | 0.3731 | 1.2562 | 2400 | 0.0 | -1886105216.0 | -1661210752.0 | -496.7679 | -605.2153 | 0.4578 | -1.6493 | 1.4530 | -3.1023 |
89
+ | 0.3538 | 1.3086 | 2500 | 0.0 | -1523131520.0 | -1173539584.0 | -520.2969 | -641.4639 | 0.4578 | -1.8841 | 1.5789 | -3.4630 |
90
+ | 0.3699 | 1.3609 | 2600 | 0.0 | -2620041984.0 | -2228706560.0 | -464.2454 | -571.5127 | 0.4552 | -1.3250 | 1.4383 | -2.7633 |
91
+ | 0.3293 | 1.4132 | 2700 | 0.0 | -1547698176.0 | -1145178112.0 | -559.4696 | -805.1397 | 0.4508 | -2.2760 | 2.8229 | -5.0990 |
92
+ | 0.3376 | 1.4656 | 2800 | 0.0 | -1543770240.0 | -1201035648.0 | -495.7229 | -643.6280 | 0.4576 | -1.6388 | 1.8453 | -3.4841 |
93
+ | 0.3545 | 1.5179 | 2900 | 0.0 | -1516407296.0 | -1264482816.0 | -736.2058 | -1167.2123 | 0.4523 | -4.0433 | 4.6733 | -8.7166 |
94
+ | 0.3399 | 1.5703 | 3000 | 0.0 | -2308331776.0 | -2173847808.0 | -634.2682 | -983.0731 | 0.4497 | -3.0224 | 3.8548 | -6.8772 |
95
+ | 0.3429 | 1.6226 | 3100 | 0.0 | -2065728000.0 | -1775322368.0 | -641.7100 | -945.9015 | 0.4497 | -3.1008 | 3.4052 | -6.5060 |
96
+ | 0.3005 | 1.6750 | 3200 | 0.0 | -2172250112.0 | -2024051328.0 | -515.8318 | -719.3396 | 0.4492 | -1.8422 | 2.3998 | -4.2420 |
97
+ | 0.3468 | 1.7273 | 3300 | 0.0 | -2299277568.0 | -2052279552.0 | -650.1019 | -1102.9896 | 0.4503 | -3.1821 | 4.8952 | -8.0773 |
98
+ | 0.3361 | 1.7796 | 3400 | 0.0 | -1953080960.0 | -1726854912.0 | -586.8936 | -953.3486 | 0.4488 | -2.5504 | 4.0314 | -6.5818 |
99
+ | 0.3405 | 1.8320 | 3500 | 0.0 | -1600359936.0 | -1368940928.0 | -595.8555 | -917.2273 | 0.4473 | -2.6417 | 3.5806 | -6.2223 |
100
+ | 0.362 | 1.8843 | 3600 | 0.0 | -1622263552.0 | -1444837888.0 | -594.8738 | -933.2352 | 0.4472 | -2.6312 | 3.7478 | -6.3789 |
101
+ | 0.3153 | 1.9367 | 3700 | 0.0 | -1449431680.0 | -1236653952.0 | -548.1009 | -828.2765 | 0.4482 | -2.1611 | 3.1750 | -5.3362 |
102
+ | 0.332 | 1.9890 | 3800 | 0.0 | -1527192704.0 | -1300766848.0 | -517.2568 | -750.3690 | 0.4465 | -1.8534 | 2.6999 | -4.5532 |
103
+ | 0.3281 | 2.0414 | 3900 | 0.0 | -1679585792.0 | -1427328256.0 | -596.9639 | -954.6852 | 0.4463 | -2.6517 | 3.9427 | -6.5944 |
104
+ | 0.3181 | 2.0937 | 4000 | 0.0 | -1560081408.0 | -1263351040.0 | -525.1103 | -727.0094 | 0.4486 | -1.9321 | 2.3847 | -4.3169 |
105
+ | 0.2603 | 2.1460 | 4100 | 0.0 | -1459418112.0 | -1138054528.0 | -524.6037 | -721.5356 | 0.4512 | -1.9273 | 2.3376 | -4.2649 |
106
+ | 0.2388 | 2.1984 | 4200 | 0.0 | -1377462656.0 | -1113288064.0 | -484.2593 | -613.2352 | 0.4556 | -1.5242 | 1.6601 | -3.1844 |
107
+ | 0.224 | 2.2507 | 4300 | 0.0 | -1391976320.0 | -1133061248.0 | -479.9842 | -590.8304 | 0.4576 | -1.4813 | 1.4770 | -2.9584 |
108
+ | 0.26 | 2.3031 | 4400 | 0.0 | -1215682432.0 | -961427712.0 | -496.7046 | -633.5714 | 0.4562 | -1.6511 | 1.7328 | -3.3839 |
109
+ | 0.2234 | 2.3554 | 4500 | 0.0 | -1145577600.0 | -884432256.0 | -540.8808 | -735.6340 | 0.4557 | -2.0918 | 2.3138 | -4.4056 |
110
+ | 0.235 | 2.4077 | 4600 | 0.0 | -1404625792.0 | -1132328960.0 | -488.9302 | -651.3615 | 0.4559 | -1.5702 | 1.9911 | -3.5613 |
111
+ | 0.2246 | 2.4601 | 4700 | 0.0 | -1378527872.0 | -1094446976.0 | -506.0782 | -691.9065 | 0.4523 | -1.7423 | 2.2245 | -3.9668 |
112
+ | 0.24 | 2.5124 | 4800 | 0.0 | -1400231808.0 | -1111490560.0 | -509.5299 | -701.2312 | 0.4519 | -1.7776 | 2.2838 | -4.0614 |
113
+ | 0.2557 | 2.5648 | 4900 | 0.0 | -1397235840.0 | -1123008256.0 | -528.5304 | -749.2869 | 0.4514 | -1.9623 | 2.5779 | -4.5403 |
114
+ | 0.2413 | 2.6171 | 5000 | 0.0 | -1477659904.0 | -1194644352.0 | -527.0421 | -771.0870 | 0.4506 | -1.9521 | 2.8087 | -4.7608 |
115
+ | 0.2273 | 2.6695 | 5100 | 0.0 | -1420737280.0 | -1144246016.0 | -524.3503 | -753.7106 | 0.4495 | -1.9271 | 2.6609 | -4.5880 |
116
+ | 0.2645 | 2.7218 | 5200 | 0.0 | -1426662528.0 | -1147708032.0 | -529.0688 | -767.9682 | 0.4501 | -1.9730 | 2.7549 | -4.7279 |
117
+ | 0.2637 | 2.7741 | 5300 | 0.0 | -1414479104.0 | -1139319424.0 | -551.0460 | -835.7553 | 0.4497 | -2.1932 | 3.2146 | -5.4078 |
118
+ | 0.2683 | 2.8265 | 5400 | 0.0 | -1420271232.0 | -1144445824.0 | -552.9777 | -847.9125 | 0.4496 | -2.2117 | 3.3187 | -5.5303 |
119
+ | 0.2551 | 2.8788 | 5500 | 0.0 | -1425663872.0 | -1148307200.0 | -551.4577 | -846.3531 | 0.4494 | -2.1969 | 3.3124 | -5.5093 |
120
+ | 0.2695 | 2.9312 | 5600 | 0.0 | -1428859520.0 | -1154099328.0 | -548.8293 | -841.0065 | 0.4490 | -2.1721 | 3.2870 | -5.4591 |
121
+ | 0.2664 | 2.9835 | 5700 | 0.0 | -1428926080.0 | -1154165888.0 | -548.5443 | -840.7200 | 0.4491 | -2.1686 | 3.2859 | -5.4545 |
122
 
123
 
124
  ### Framework versions
all_results.json CHANGED
@@ -1,9 +1,22 @@
1
  {
2
  "epoch": 2.999214865218529,
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "total_flos": 0.0,
4
- "train_loss": 0.3599983148757908,
5
- "train_runtime": 21481.0681,
6
  "train_samples": 122269,
7
- "train_samples_per_second": 17.076,
8
- "train_steps_per_second": 0.267
9
  }
 
1
  {
2
  "epoch": 2.999214865218529,
3
+ "eval_kl": 0.0,
4
+ "eval_logits/chosen": -1429059328.0,
5
+ "eval_logits/rejected": -1154032768.0,
6
+ "eval_logps/chosen": -548.4809500247402,
7
+ "eval_logps/rejected": -840.9109895574342,
8
+ "eval_loss": 0.4489765763282776,
9
+ "eval_rewards/chosen": -2.1658832261256804,
10
+ "eval_rewards/margins": 3.285384799732102,
11
+ "eval_rewards/rejected": -5.451268025857782,
12
+ "eval_runtime": 93.4316,
13
+ "eval_samples": 4000,
14
+ "eval_samples_per_second": 42.812,
15
+ "eval_steps_per_second": 0.674,
16
  "total_flos": 0.0,
17
+ "train_loss": 0.0,
18
+ "train_runtime": 0.0431,
19
  "train_samples": 122269,
20
+ "train_samples_per_second": 8512981.493,
21
+ "train_steps_per_second": 132983.787
22
  }
eval_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.999214865218529,
3
+ "eval_kl": 0.0,
4
+ "eval_logits/chosen": -1429059328.0,
5
+ "eval_logits/rejected": -1154032768.0,
6
+ "eval_logps/chosen": -548.4809500247402,
7
+ "eval_logps/rejected": -840.9109895574342,
8
+ "eval_loss": 0.4489765763282776,
9
+ "eval_rewards/chosen": -2.1658832261256804,
10
+ "eval_rewards/margins": 3.285384799732102,
11
+ "eval_rewards/rejected": -5.451268025857782,
12
+ "eval_runtime": 93.4316,
13
+ "eval_samples": 4000,
14
+ "eval_samples_per_second": 42.812,
15
+ "eval_steps_per_second": 0.674
16
+ }
runs/Sep20_23-48-30_xe8545-a100-31/events.out.tfevents.1726891426.xe8545-a100-31.112488.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8350f75fa0efbc0f843a70d04946abe8f8b928b5f898456df8f479a6ef904fa
3
+ size 812
runs/Sep24_05-31-01_xe8545-a100-23/events.out.tfevents.1727149416.xe8545-a100-23.342034.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5531ea5efa47c27372c6c9bd8d823823a2bff5cc87405fe594543a2c22ef9aff
3
+ size 7193
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 2.999214865218529,
3
  "total_flos": 0.0,
4
- "train_loss": 0.3599983148757908,
5
- "train_runtime": 21481.0681,
6
  "train_samples": 122269,
7
- "train_samples_per_second": 17.076,
8
- "train_steps_per_second": 0.267
9
  }
 
1
  {
2
  "epoch": 2.999214865218529,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.0,
5
+ "train_runtime": 0.0431,
6
  "train_samples": 122269,
7
+ "train_samples_per_second": 8512981.493,
8
+ "train_steps_per_second": 132983.787
9
  }
trainer_state.json CHANGED
@@ -9519,10 +9519,10 @@
9519
  "epoch": 2.999214865218529,
9520
  "step": 5730,
9521
  "total_flos": 0.0,
9522
- "train_loss": 0.3599983148757908,
9523
- "train_runtime": 21481.0681,
9524
- "train_samples_per_second": 17.076,
9525
- "train_steps_per_second": 0.267
9526
  }
9527
  ],
9528
  "logging_steps": 10,
 
9519
  "epoch": 2.999214865218529,
9520
  "step": 5730,
9521
  "total_flos": 0.0,
9522
+ "train_loss": 0.0,
9523
+ "train_runtime": 0.0431,
9524
+ "train_samples_per_second": 8512981.493,
9525
+ "train_steps_per_second": 132983.787
9526
  }
9527
  ],
9528
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23ee1e89df459fd3b9a2bd1500dc2ab8d606972995f9a11dc189ab61f2935552
3
  size 7032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:193579b58931e077e6634f874afc9ecb8ab37d62e11f4161e06d8a4228126fcb
3
  size 7032