C0uchP0tat0
commited on
Commit
•
86be24b
1
Parent(s):
59613ee
End of training
Browse files
README.md
ADDED
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: ai-forever/rugpt3large_based_on_gpt2
|
3 |
+
tags:
|
4 |
+
- generated_from_trainer
|
5 |
+
model-index:
|
6 |
+
- name: laws_rugpt3medium_finetune
|
7 |
+
results: []
|
8 |
+
---
|
9 |
+
|
10 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
11 |
+
should probably proofread and complete it, then remove this comment. -->
|
12 |
+
|
13 |
+
# laws_rugpt3medium_finetune
|
14 |
+
|
15 |
+
This model is a fine-tuned version of [ai-forever/rugpt3large_based_on_gpt2](https://huggingface.co/ai-forever/rugpt3large_based_on_gpt2) on an unknown dataset.
|
16 |
+
It achieves the following results on the evaluation set:
|
17 |
+
- Loss: 0.4051
|
18 |
+
|
19 |
+
## Model description
|
20 |
+
|
21 |
+
More information needed
|
22 |
+
|
23 |
+
## Intended uses & limitations
|
24 |
+
|
25 |
+
More information needed
|
26 |
+
|
27 |
+
## Training and evaluation data
|
28 |
+
|
29 |
+
More information needed
|
30 |
+
|
31 |
+
## Training procedure
|
32 |
+
|
33 |
+
### Training hyperparameters
|
34 |
+
|
35 |
+
The following hyperparameters were used during training:
|
36 |
+
- learning_rate: 1e-05
|
37 |
+
- train_batch_size: 4
|
38 |
+
- eval_batch_size: 4
|
39 |
+
- seed: 42
|
40 |
+
- gradient_accumulation_steps: 3
|
41 |
+
- total_train_batch_size: 12
|
42 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
43 |
+
- lr_scheduler_type: cosine
|
44 |
+
- lr_scheduler_warmup_steps: 1000
|
45 |
+
- num_epochs: 30
|
46 |
+
- mixed_precision_training: Native AMP
|
47 |
+
|
48 |
+
### Training results
|
49 |
+
|
50 |
+
| Training Loss | Epoch | Step | Validation Loss |
|
51 |
+
|:-------------:|:-----:|:----:|:---------------:|
|
52 |
+
| 3.3772 | 0.23 | 25 | 3.3796 |
|
53 |
+
| 3.4598 | 0.46 | 50 | 3.3744 |
|
54 |
+
| 3.3981 | 0.69 | 75 | 3.3587 |
|
55 |
+
| 3.4916 | 0.93 | 100 | 3.3322 |
|
56 |
+
| 3.4166 | 1.16 | 125 | 3.2980 |
|
57 |
+
| 3.3829 | 1.39 | 150 | 3.2626 |
|
58 |
+
| 3.2992 | 1.62 | 175 | 3.2285 |
|
59 |
+
| 3.3237 | 1.85 | 200 | 3.1936 |
|
60 |
+
| 3.2106 | 2.08 | 225 | 3.1601 |
|
61 |
+
| 3.1947 | 2.31 | 250 | 3.1311 |
|
62 |
+
| 3.2183 | 2.55 | 275 | 3.0988 |
|
63 |
+
| 3.2124 | 2.78 | 300 | 3.0620 |
|
64 |
+
| 3.1725 | 3.01 | 325 | 3.0266 |
|
65 |
+
| 3.078 | 3.24 | 350 | 2.9931 |
|
66 |
+
| 3.0387 | 3.47 | 375 | 2.9595 |
|
67 |
+
| 3.0944 | 3.7 | 400 | 2.9194 |
|
68 |
+
| 3.049 | 3.94 | 425 | 2.8818 |
|
69 |
+
| 2.9818 | 4.17 | 450 | 2.8438 |
|
70 |
+
| 2.9278 | 4.4 | 475 | 2.8074 |
|
71 |
+
| 2.9172 | 4.63 | 500 | 2.7671 |
|
72 |
+
| 2.8432 | 4.86 | 525 | 2.7233 |
|
73 |
+
| 2.8499 | 5.09 | 550 | 2.6794 |
|
74 |
+
| 2.76 | 5.32 | 575 | 2.6310 |
|
75 |
+
| 2.7197 | 5.56 | 600 | 2.5857 |
|
76 |
+
| 2.793 | 5.79 | 625 | 2.5458 |
|
77 |
+
| 2.6895 | 6.02 | 650 | 2.4991 |
|
78 |
+
| 2.651 | 6.25 | 675 | 2.4496 |
|
79 |
+
| 2.5484 | 6.48 | 700 | 2.4014 |
|
80 |
+
| 2.5728 | 6.71 | 725 | 2.3471 |
|
81 |
+
| 2.4865 | 6.94 | 750 | 2.2953 |
|
82 |
+
| 2.4388 | 7.18 | 775 | 2.2369 |
|
83 |
+
| 2.4137 | 7.41 | 800 | 2.1799 |
|
84 |
+
| 2.3262 | 7.64 | 825 | 2.1285 |
|
85 |
+
| 2.3043 | 7.87 | 850 | 2.0836 |
|
86 |
+
| 2.2541 | 8.1 | 875 | 2.0299 |
|
87 |
+
| 2.1348 | 8.33 | 900 | 1.9730 |
|
88 |
+
| 2.1904 | 8.56 | 925 | 1.9211 |
|
89 |
+
| 2.0869 | 8.8 | 950 | 1.8719 |
|
90 |
+
| 2.1606 | 9.03 | 975 | 1.8210 |
|
91 |
+
| 1.9323 | 9.26 | 1000 | 1.7712 |
|
92 |
+
| 1.9892 | 9.49 | 1025 | 1.7254 |
|
93 |
+
| 1.9407 | 9.72 | 1050 | 1.6757 |
|
94 |
+
| 1.8791 | 9.95 | 1075 | 1.6214 |
|
95 |
+
| 1.7791 | 10.19 | 1100 | 1.5702 |
|
96 |
+
| 1.7523 | 10.42 | 1125 | 1.5284 |
|
97 |
+
| 1.7336 | 10.65 | 1150 | 1.4912 |
|
98 |
+
| 1.7709 | 10.88 | 1175 | 1.4475 |
|
99 |
+
| 1.6533 | 11.11 | 1200 | 1.3941 |
|
100 |
+
| 1.5671 | 11.34 | 1225 | 1.3536 |
|
101 |
+
| 1.5394 | 11.57 | 1250 | 1.3209 |
|
102 |
+
| 1.6085 | 11.81 | 1275 | 1.2921 |
|
103 |
+
| 1.5465 | 12.04 | 1300 | 1.2599 |
|
104 |
+
| 1.4172 | 12.27 | 1325 | 1.2292 |
|
105 |
+
| 1.4422 | 12.5 | 1350 | 1.1927 |
|
106 |
+
| 1.4708 | 12.73 | 1375 | 1.1563 |
|
107 |
+
| 1.3859 | 12.96 | 1400 | 1.1260 |
|
108 |
+
| 1.2036 | 13.19 | 1425 | 1.0932 |
|
109 |
+
| 1.3393 | 13.43 | 1450 | 1.0697 |
|
110 |
+
| 1.3203 | 13.66 | 1475 | 1.0376 |
|
111 |
+
| 1.2902 | 13.89 | 1500 | 1.0084 |
|
112 |
+
| 1.2356 | 14.12 | 1525 | 0.9760 |
|
113 |
+
| 1.2329 | 14.35 | 1550 | 0.9531 |
|
114 |
+
| 1.2039 | 14.58 | 1575 | 0.9343 |
|
115 |
+
| 1.1521 | 14.81 | 1600 | 0.9084 |
|
116 |
+
| 1.0754 | 15.05 | 1625 | 0.8786 |
|
117 |
+
| 1.0786 | 15.28 | 1650 | 0.8620 |
|
118 |
+
| 1.1052 | 15.51 | 1675 | 0.8395 |
|
119 |
+
| 1.0765 | 15.74 | 1700 | 0.8192 |
|
120 |
+
| 1.0817 | 15.97 | 1725 | 0.8002 |
|
121 |
+
| 1.0285 | 16.2 | 1750 | 0.7715 |
|
122 |
+
| 1.0313 | 16.44 | 1775 | 0.7612 |
|
123 |
+
| 0.9682 | 16.67 | 1800 | 0.7458 |
|
124 |
+
| 1.0025 | 16.9 | 1825 | 0.7267 |
|
125 |
+
| 0.9516 | 17.13 | 1850 | 0.7052 |
|
126 |
+
| 0.9475 | 17.36 | 1875 | 0.6952 |
|
127 |
+
| 0.8851 | 17.59 | 1900 | 0.6745 |
|
128 |
+
| 0.9463 | 17.82 | 1925 | 0.6602 |
|
129 |
+
| 0.8937 | 18.06 | 1950 | 0.6436 |
|
130 |
+
| 0.8135 | 18.29 | 1975 | 0.6316 |
|
131 |
+
| 0.8738 | 18.52 | 2000 | 0.6172 |
|
132 |
+
| 0.8585 | 18.75 | 2025 | 0.6072 |
|
133 |
+
| 0.8782 | 18.98 | 2050 | 0.5968 |
|
134 |
+
| 0.8324 | 19.21 | 2075 | 0.5789 |
|
135 |
+
| 0.7818 | 19.44 | 2100 | 0.5688 |
|
136 |
+
| 0.8375 | 19.68 | 2125 | 0.5602 |
|
137 |
+
| 0.7838 | 19.91 | 2150 | 0.5498 |
|
138 |
+
| 0.8015 | 20.14 | 2175 | 0.5369 |
|
139 |
+
| 0.724 | 20.37 | 2200 | 0.5299 |
|
140 |
+
| 0.7298 | 20.6 | 2225 | 0.5233 |
|
141 |
+
| 0.8079 | 20.83 | 2250 | 0.5141 |
|
142 |
+
| 0.77 | 21.06 | 2275 | 0.5058 |
|
143 |
+
| 0.7299 | 21.3 | 2300 | 0.4995 |
|
144 |
+
| 0.7152 | 21.53 | 2325 | 0.4893 |
|
145 |
+
| 0.6905 | 21.76 | 2350 | 0.4882 |
|
146 |
+
| 0.7492 | 21.99 | 2375 | 0.4779 |
|
147 |
+
| 0.6817 | 22.22 | 2400 | 0.4681 |
|
148 |
+
| 0.6893 | 22.45 | 2425 | 0.4652 |
|
149 |
+
| 0.7098 | 22.69 | 2450 | 0.4611 |
|
150 |
+
| 0.7063 | 22.92 | 2475 | 0.4582 |
|
151 |
+
| 0.6562 | 23.15 | 2500 | 0.4511 |
|
152 |
+
| 0.7083 | 23.38 | 2525 | 0.4474 |
|
153 |
+
| 0.6684 | 23.61 | 2550 | 0.4438 |
|
154 |
+
| 0.6688 | 23.84 | 2575 | 0.4398 |
|
155 |
+
| 0.6561 | 24.07 | 2600 | 0.4334 |
|
156 |
+
| 0.6664 | 24.31 | 2625 | 0.4318 |
|
157 |
+
| 0.6418 | 24.54 | 2650 | 0.4294 |
|
158 |
+
| 0.6723 | 24.77 | 2675 | 0.4249 |
|
159 |
+
| 0.6164 | 25.0 | 2700 | 0.4215 |
|
160 |
+
| 0.6348 | 25.23 | 2725 | 0.4203 |
|
161 |
+
| 0.6464 | 25.46 | 2750 | 0.4182 |
|
162 |
+
| 0.6392 | 25.69 | 2775 | 0.4171 |
|
163 |
+
| 0.6186 | 25.93 | 2800 | 0.4156 |
|
164 |
+
| 0.6447 | 26.16 | 2825 | 0.4138 |
|
165 |
+
| 0.6445 | 26.39 | 2850 | 0.4114 |
|
166 |
+
| 0.6037 | 26.62 | 2875 | 0.4109 |
|
167 |
+
| 0.6074 | 26.85 | 2900 | 0.4099 |
|
168 |
+
| 0.6509 | 27.08 | 2925 | 0.4092 |
|
169 |
+
| 0.6416 | 27.31 | 2950 | 0.4082 |
|
170 |
+
| 0.6391 | 27.55 | 2975 | 0.4075 |
|
171 |
+
| 0.594 | 27.78 | 3000 | 0.4071 |
|
172 |
+
| 0.6231 | 28.01 | 3025 | 0.4066 |
|
173 |
+
| 0.6151 | 28.24 | 3050 | 0.4061 |
|
174 |
+
| 0.6464 | 28.47 | 3075 | 0.4056 |
|
175 |
+
| 0.6024 | 28.7 | 3100 | 0.4054 |
|
176 |
+
| 0.6277 | 28.94 | 3125 | 0.4052 |
|
177 |
+
| 0.6017 | 29.17 | 3150 | 0.4052 |
|
178 |
+
| 0.6226 | 29.4 | 3175 | 0.4051 |
|
179 |
+
| 0.6084 | 29.63 | 3200 | 0.4051 |
|
180 |
+
| 0.639 | 29.86 | 3225 | 0.4051 |
|
181 |
+
|
182 |
+
|
183 |
+
### Framework versions
|
184 |
+
|
185 |
+
- Transformers 4.35.2
|
186 |
+
- Pytorch 2.1.0+cu121
|
187 |
+
- Datasets 2.16.0
|
188 |
+
- Tokenizers 0.15.0
|
generation_config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 1,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"pad_token_id": 0,
|
6 |
+
"transformers_version": "4.35.2"
|
7 |
+
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3041230624
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d6021da3c1bb650999a93ef3b0ecfd5eb8f060f8305c275ac8441a63ddcca85
|
3 |
size 3041230624
|
runs/Dec29_10-36-37_f584ed272cfa/events.out.tfevents.1703846199.f584ed272cfa.2912.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8344574c455e7f44551fa76c05429a8c05f9fc637aa1bccc3d10892aa954923e
|
3 |
+
size 59968
|