KaraKaraWitch commited on
Commit
2299fd2
1 Parent(s): 2a70f66
README.md CHANGED
@@ -23,47 +23,60 @@ model-index:
23
  metrics:
24
  - name: Accuracy
25
  type: accuracy
26
- value: 0.8571428571428571
27
  ---
28
 
29
- <!-- This model card has been generated automatically according to the information the Trainer had access to. You
30
- should probably proofread and complete it, then remove this comment. -->
31
 
32
  # Cowboy Hat emoji 🤠 (Western)
33
 
34
- This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the imagefolder dataset.
35
  It achieves the following results on the evaluation set:
36
- - Loss: 0.5372
37
- - Accuracy: 0.8571
38
 
39
  ## Model description
40
 
41
  When you want to know if an art is 🤠 or not 🤠.
42
 
 
 
 
 
 
 
 
 
43
  ## Intended uses & limitations
44
 
45
  filter gelbooru data on 🤠 or not 🤠
46
 
47
  ## Training and evaluation data
48
 
49
- Selected 72 images of 🤠 and not 🤠.
50
 
51
  ## Training procedure
52
 
53
  ### Training hyperparameters
54
 
55
  The following hyperparameters were used during training:
56
- - learning_rate: 5e-05
57
  - train_batch_size: 8
58
  - eval_batch_size: 8
59
- - seed: 42
60
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
61
  - lr_scheduler_type: linear
62
- - num_epochs: 3.0
63
 
64
  ### Training results
65
 
66
- Works OK. Needs more finetuning.
 
 
 
 
 
 
 
67
 
68
  ### Framework versions
69
 
 
23
  metrics:
24
  - name: Accuracy
25
  type: accuracy
26
+ value: 0.7777777777777778
27
  ---
28
 
 
 
29
 
30
  # Cowboy Hat emoji 🤠 (Western)
31
 
32
+ This model is a fine-tuned version of [facebook/convnextv2-large-22k-384](https://huggingface.co/facebook/convnextv2-large-22k-384) on the imagefolder dataset.
33
  It achieves the following results on the evaluation set:
34
+ - Loss: 0.4250
35
+ - Accuracy: 0.7778
36
 
37
  ## Model description
38
 
39
  When you want to know if an art is 🤠 or not 🤠.
40
 
41
+ - Current iteration: v3.5 (Continuous Image Integration)
42
+
43
+ ## Wait, why?
44
+
45
+ gelbooru contains a lot of images, however not all of them are in the same region as south eas asia. As such, to filter out such images we have created a classifier that in theory learns the differences between western (USA/Europe/etc.) and not western (Japan/China/SEA).
46
+
47
+ The definition of "Not Western" is limited to the the asian region (Japan, Korea, China, Taiwan, Thailand and the surroundign region). The author believes that the art is similar enough with the same "style" which he personally prefers over western art.
48
+
49
  ## Intended uses & limitations
50
 
51
  filter gelbooru data on 🤠 or not 🤠
52
 
53
  ## Training and evaluation data
54
 
55
+ Selected 358 images of 🤠 and not 🤠.
56
 
57
  ## Training procedure
58
 
59
  ### Training hyperparameters
60
 
61
  The following hyperparameters were used during training:
62
+ - learning_rate: 2e-05
63
  - train_batch_size: 8
64
  - eval_batch_size: 8
65
+ - seed: 802565
66
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
67
  - lr_scheduler_type: linear
68
+ - num_epochs: 5.0
69
 
70
  ### Training results
71
 
72
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
73
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
74
+ | 0.7384 | 1.0 | 152 | 0.4268 | 0.7963 |
75
+ | 0.2888 | 2.0 | 304 | 0.4250 | 0.7778 |
76
+ | 0.2953 | 3.0 | 456 | 0.4250 | 0.7778 |
77
+ | 0.4914 | 4.0 | 608 | 0.4250 | 0.7778 |
78
+ | 0.4099 | 5.0 | 760 | 0.4250 | 0.7778 |
79
+
80
 
81
  ### Framework versions
82
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_accuracy": 0.8571428571428571,
4
- "eval_loss": 0.537242591381073,
5
- "eval_runtime": 1.4402,
6
- "eval_samples_per_second": 14.582,
7
- "eval_steps_per_second": 2.083,
8
- "train_loss": 0.5724380493164063,
9
- "train_runtime": 25.85,
10
- "train_samples_per_second": 13.81,
11
- "train_steps_per_second": 1.741
12
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.7777777777777778,
4
+ "eval_loss": 0.4249849319458008,
5
+ "eval_runtime": 5.6527,
6
+ "eval_samples_per_second": 9.553,
7
+ "eval_steps_per_second": 1.238,
8
+ "train_loss": 0.39741649894337905,
9
+ "train_runtime": 401.1562,
10
+ "train_samples_per_second": 3.789,
11
+ "train_steps_per_second": 1.895
12
  }
config.json CHANGED
@@ -1,33 +1,52 @@
1
  {
2
- "_name_or_path": "google/vit-base-patch16-224-in21k",
3
  "architectures": [
4
- "ViTForImageClassification"
5
  ],
6
- "attention_probs_dropout_prob": 0.0,
7
- "encoder_stride": 16,
 
 
 
 
 
8
  "finetuning_task": "image-classification",
9
  "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.0,
11
- "hidden_size": 768,
 
 
 
 
12
  "id2label": {
13
  "0": "not_western",
14
  "1": "western"
15
  },
16
  "image_size": 224,
17
  "initializer_range": 0.02,
18
- "intermediate_size": 3072,
19
  "label2id": {
20
  "not_western": "0",
21
  "western": "1"
22
  },
23
  "layer_norm_eps": 1e-12,
24
- "model_type": "vit",
25
- "num_attention_heads": 12,
26
  "num_channels": 3,
27
- "num_hidden_layers": 12,
28
- "patch_size": 16,
 
 
 
 
 
 
29
  "problem_type": "single_label_classification",
30
- "qkv_bias": true,
 
 
 
 
 
 
31
  "torch_dtype": "float32",
32
  "transformers_version": "4.30.0.dev0"
33
  }
 
1
  {
2
+ "_name_or_path": "facebook/convnextv2-large-22k-384",
3
  "architectures": [
4
+ "ConvNextV2ForImageClassification"
5
  ],
6
+ "depths": [
7
+ 3,
8
+ 3,
9
+ 27,
10
+ 3
11
+ ],
12
+ "drop_path_rate": 0.0,
13
  "finetuning_task": "image-classification",
14
  "hidden_act": "gelu",
15
+ "hidden_sizes": [
16
+ 192,
17
+ 384,
18
+ 768,
19
+ 1536
20
+ ],
21
  "id2label": {
22
  "0": "not_western",
23
  "1": "western"
24
  },
25
  "image_size": 224,
26
  "initializer_range": 0.02,
 
27
  "label2id": {
28
  "not_western": "0",
29
  "western": "1"
30
  },
31
  "layer_norm_eps": 1e-12,
32
+ "model_type": "convnextv2",
 
33
  "num_channels": 3,
34
+ "num_stages": 4,
35
+ "out_features": [
36
+ "stage4"
37
+ ],
38
+ "out_indices": [
39
+ 4
40
+ ],
41
+ "patch_size": 4,
42
  "problem_type": "single_label_classification",
43
+ "stage_names": [
44
+ "stem",
45
+ "stage1",
46
+ "stage2",
47
+ "stage3",
48
+ "stage4"
49
+ ],
50
  "torch_dtype": "float32",
51
  "transformers_version": "4.30.0.dev0"
52
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_accuracy": 0.8571428571428571,
4
- "eval_loss": 0.537242591381073,
5
- "eval_runtime": 1.4402,
6
- "eval_samples_per_second": 14.582,
7
- "eval_steps_per_second": 2.083
8
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.7777777777777778,
4
+ "eval_loss": 0.4249849319458008,
5
+ "eval_runtime": 5.6527,
6
+ "eval_samples_per_second": 9.553,
7
+ "eval_steps_per_second": 1.238
8
  }
preprocessor_config.json CHANGED
@@ -1,22 +1,22 @@
1
  {
 
2
  "do_normalize": true,
3
  "do_rescale": true,
4
  "do_resize": true,
5
  "image_mean": [
6
- 0.5,
7
- 0.5,
8
- 0.5
9
  ],
10
- "image_processor_type": "ViTImageProcessor",
11
  "image_std": [
12
- 0.5,
13
- 0.5,
14
- 0.5
15
  ],
16
- "resample": 2,
17
  "rescale_factor": 0.00392156862745098,
18
  "size": {
19
- "height": 224,
20
- "width": 224
21
  }
22
  }
 
1
  {
2
+ "crop_pct": 0.875,
3
  "do_normalize": true,
4
  "do_rescale": true,
5
  "do_resize": true,
6
  "image_mean": [
7
+ 0.485,
8
+ 0.456,
9
+ 0.406
10
  ],
11
+ "image_processor_type": "ConvNextImageProcessor",
12
  "image_std": [
13
+ 0.229,
14
+ 0.224,
15
+ 0.225
16
  ],
17
+ "resample": 3,
18
  "rescale_factor": 0.00392156862745098,
19
  "size": {
20
+ "shortest_edge": 384
 
21
  }
22
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f820f1f2576997b6a31a625789757927367756647cb988e7a143be3eb6411c5f
3
- size 343268717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fba82cf2e625f4ef8e2d6128e772638a78a40848602ee5ed01861967aa0bc0a7
3
+ size 785824061
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 3.0,
3
- "train_loss": 0.5724380493164063,
4
- "train_runtime": 25.85,
5
- "train_samples_per_second": 13.81,
6
- "train_steps_per_second": 1.741
7
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "train_loss": 0.39741649894337905,
4
+ "train_runtime": 401.1562,
5
+ "train_samples_per_second": 3.789,
6
+ "train_steps_per_second": 1.895
7
  }
trainer_state.json CHANGED
@@ -1,25 +1,526 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
- "global_step": 45,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 3.0,
12
- "step": 45,
13
- "total_flos": 2.7664640292409344e+16,
14
- "train_loss": 0.5724380493164063,
15
- "train_runtime": 25.85,
16
- "train_samples_per_second": 13.81,
17
- "train_steps_per_second": 1.741
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  }
19
  ],
20
- "max_steps": 45,
21
- "num_train_epochs": 3,
22
- "total_flos": 2.7664640292409344e+16,
23
  "trial_name": null,
24
  "trial_params": null
25
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "global_step": 760,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
+ {
11
+ "epoch": 0.07,
12
+ "learning_rate": 1.8842105263157898e-05,
13
+ "loss": 0.8301,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.13,
18
+ "learning_rate": 1.7789473684210527e-05,
19
+ "loss": 0.7976,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.2,
24
+ "learning_rate": 1.673684210526316e-05,
25
+ "loss": 0.5757,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.26,
30
+ "learning_rate": 1.568421052631579e-05,
31
+ "loss": 0.4569,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.33,
36
+ "learning_rate": 1.4631578947368424e-05,
37
+ "loss": 0.7308,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.39,
42
+ "learning_rate": 1.3578947368421055e-05,
43
+ "loss": 0.6149,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.46,
48
+ "learning_rate": 1.2526315789473684e-05,
49
+ "loss": 0.666,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 0.53,
54
+ "learning_rate": 1.1473684210526317e-05,
55
+ "loss": 0.6656,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 0.59,
60
+ "learning_rate": 1.0421052631578948e-05,
61
+ "loss": 0.5629,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 0.66,
66
+ "learning_rate": 9.36842105263158e-06,
67
+ "loss": 0.8803,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 0.72,
72
+ "learning_rate": 8.315789473684212e-06,
73
+ "loss": 0.4404,
74
+ "step": 110
75
+ },
76
+ {
77
+ "epoch": 0.79,
78
+ "learning_rate": 7.263157894736843e-06,
79
+ "loss": 0.3989,
80
+ "step": 120
81
+ },
82
+ {
83
+ "epoch": 0.86,
84
+ "learning_rate": 6.2105263157894745e-06,
85
+ "loss": 0.5877,
86
+ "step": 130
87
+ },
88
+ {
89
+ "epoch": 0.92,
90
+ "learning_rate": 5.157894736842106e-06,
91
+ "loss": 0.5024,
92
+ "step": 140
93
+ },
94
+ {
95
+ "epoch": 0.99,
96
+ "learning_rate": 4.105263157894737e-06,
97
+ "loss": 0.7384,
98
+ "step": 150
99
+ },
100
+ {
101
+ "epoch": 1.0,
102
+ "eval_accuracy": 0.7962962962962963,
103
+ "eval_loss": 0.42684096097946167,
104
+ "eval_runtime": 5.5879,
105
+ "eval_samples_per_second": 9.664,
106
+ "eval_steps_per_second": 1.253,
107
+ "step": 152
108
+ },
109
+ {
110
+ "epoch": 1.05,
111
+ "learning_rate": 3.052631578947369e-06,
112
+ "loss": 0.4284,
113
+ "step": 160
114
+ },
115
+ {
116
+ "epoch": 1.12,
117
+ "learning_rate": 2.0000000000000003e-06,
118
+ "loss": 0.1814,
119
+ "step": 170
120
+ },
121
+ {
122
+ "epoch": 1.18,
123
+ "learning_rate": 9.473684210526317e-07,
124
+ "loss": 0.2693,
125
+ "step": 180
126
+ },
127
+ {
128
+ "epoch": 1.25,
129
+ "learning_rate": 0.0,
130
+ "loss": 0.3471,
131
+ "step": 190
132
+ },
133
+ {
134
+ "epoch": 1.32,
135
+ "learning_rate": 0.0,
136
+ "loss": 0.3491,
137
+ "step": 200
138
+ },
139
+ {
140
+ "epoch": 1.38,
141
+ "learning_rate": 0.0,
142
+ "loss": 0.2447,
143
+ "step": 210
144
+ },
145
+ {
146
+ "epoch": 1.45,
147
+ "learning_rate": 0.0,
148
+ "loss": 0.4893,
149
+ "step": 220
150
+ },
151
+ {
152
+ "epoch": 1.51,
153
+ "learning_rate": 0.0,
154
+ "loss": 0.2365,
155
+ "step": 230
156
+ },
157
+ {
158
+ "epoch": 1.58,
159
+ "learning_rate": 0.0,
160
+ "loss": 0.2919,
161
+ "step": 240
162
+ },
163
+ {
164
+ "epoch": 1.64,
165
+ "learning_rate": 0.0,
166
+ "loss": 0.4932,
167
+ "step": 250
168
+ },
169
+ {
170
+ "epoch": 1.71,
171
+ "learning_rate": 0.0,
172
+ "loss": 0.2127,
173
+ "step": 260
174
+ },
175
+ {
176
+ "epoch": 1.78,
177
+ "learning_rate": 0.0,
178
+ "loss": 0.2355,
179
+ "step": 270
180
+ },
181
+ {
182
+ "epoch": 1.84,
183
+ "learning_rate": 0.0,
184
+ "loss": 0.4058,
185
+ "step": 280
186
+ },
187
+ {
188
+ "epoch": 1.91,
189
+ "learning_rate": 0.0,
190
+ "loss": 0.4578,
191
+ "step": 290
192
+ },
193
+ {
194
+ "epoch": 1.97,
195
+ "learning_rate": 0.0,
196
+ "loss": 0.2888,
197
+ "step": 300
198
+ },
199
+ {
200
+ "epoch": 2.0,
201
+ "eval_accuracy": 0.7777777777777778,
202
+ "eval_loss": 0.4249849319458008,
203
+ "eval_runtime": 5.3688,
204
+ "eval_samples_per_second": 10.058,
205
+ "eval_steps_per_second": 1.304,
206
+ "step": 304
207
+ },
208
+ {
209
+ "epoch": 2.04,
210
+ "learning_rate": 0.0,
211
+ "loss": 0.5527,
212
+ "step": 310
213
+ },
214
+ {
215
+ "epoch": 2.11,
216
+ "learning_rate": 0.0,
217
+ "loss": 0.4907,
218
+ "step": 320
219
+ },
220
+ {
221
+ "epoch": 2.17,
222
+ "learning_rate": 0.0,
223
+ "loss": 0.6212,
224
+ "step": 330
225
+ },
226
+ {
227
+ "epoch": 2.24,
228
+ "learning_rate": 0.0,
229
+ "loss": 0.3302,
230
+ "step": 340
231
+ },
232
+ {
233
+ "epoch": 2.3,
234
+ "learning_rate": 0.0,
235
+ "loss": 0.4032,
236
+ "step": 350
237
+ },
238
+ {
239
+ "epoch": 2.37,
240
+ "learning_rate": 0.0,
241
+ "loss": 0.2037,
242
+ "step": 360
243
+ },
244
+ {
245
+ "epoch": 2.43,
246
+ "learning_rate": 0.0,
247
+ "loss": 0.2949,
248
+ "step": 370
249
+ },
250
+ {
251
+ "epoch": 2.5,
252
+ "learning_rate": 0.0,
253
+ "loss": 0.3068,
254
+ "step": 380
255
+ },
256
+ {
257
+ "epoch": 2.57,
258
+ "learning_rate": 0.0,
259
+ "loss": 0.2455,
260
+ "step": 390
261
+ },
262
+ {
263
+ "epoch": 2.63,
264
+ "learning_rate": 0.0,
265
+ "loss": 0.2118,
266
+ "step": 400
267
+ },
268
+ {
269
+ "epoch": 2.7,
270
+ "learning_rate": 0.0,
271
+ "loss": 0.5577,
272
+ "step": 410
273
+ },
274
+ {
275
+ "epoch": 2.76,
276
+ "learning_rate": 0.0,
277
+ "loss": 0.3432,
278
+ "step": 420
279
+ },
280
+ {
281
+ "epoch": 2.83,
282
+ "learning_rate": 0.0,
283
+ "loss": 0.2259,
284
+ "step": 430
285
+ },
286
+ {
287
+ "epoch": 2.89,
288
+ "learning_rate": 0.0,
289
+ "loss": 0.2736,
290
+ "step": 440
291
+ },
292
+ {
293
+ "epoch": 2.96,
294
+ "learning_rate": 0.0,
295
+ "loss": 0.2953,
296
+ "step": 450
297
+ },
298
  {
299
  "epoch": 3.0,
300
+ "eval_accuracy": 0.7777777777777778,
301
+ "eval_loss": 0.4249849319458008,
302
+ "eval_runtime": 5.4342,
303
+ "eval_samples_per_second": 9.937,
304
+ "eval_steps_per_second": 1.288,
305
+ "step": 456
306
+ },
307
+ {
308
+ "epoch": 3.03,
309
+ "learning_rate": 0.0,
310
+ "loss": 0.2847,
311
+ "step": 460
312
+ },
313
+ {
314
+ "epoch": 3.09,
315
+ "learning_rate": 0.0,
316
+ "loss": 0.2117,
317
+ "step": 470
318
+ },
319
+ {
320
+ "epoch": 3.16,
321
+ "learning_rate": 0.0,
322
+ "loss": 0.2116,
323
+ "step": 480
324
+ },
325
+ {
326
+ "epoch": 3.22,
327
+ "learning_rate": 0.0,
328
+ "loss": 0.3803,
329
+ "step": 490
330
+ },
331
+ {
332
+ "epoch": 3.29,
333
+ "learning_rate": 0.0,
334
+ "loss": 0.3206,
335
+ "step": 500
336
+ },
337
+ {
338
+ "epoch": 3.36,
339
+ "learning_rate": 0.0,
340
+ "loss": 0.4362,
341
+ "step": 510
342
+ },
343
+ {
344
+ "epoch": 3.42,
345
+ "learning_rate": 0.0,
346
+ "loss": 0.4031,
347
+ "step": 520
348
+ },
349
+ {
350
+ "epoch": 3.49,
351
+ "learning_rate": 0.0,
352
+ "loss": 0.2682,
353
+ "step": 530
354
+ },
355
+ {
356
+ "epoch": 3.55,
357
+ "learning_rate": 0.0,
358
+ "loss": 0.292,
359
+ "step": 540
360
+ },
361
+ {
362
+ "epoch": 3.62,
363
+ "learning_rate": 0.0,
364
+ "loss": 0.3007,
365
+ "step": 550
366
+ },
367
+ {
368
+ "epoch": 3.68,
369
+ "learning_rate": 0.0,
370
+ "loss": 0.5192,
371
+ "step": 560
372
+ },
373
+ {
374
+ "epoch": 3.75,
375
+ "learning_rate": 0.0,
376
+ "loss": 0.3809,
377
+ "step": 570
378
+ },
379
+ {
380
+ "epoch": 3.82,
381
+ "learning_rate": 0.0,
382
+ "loss": 0.3756,
383
+ "step": 580
384
+ },
385
+ {
386
+ "epoch": 3.88,
387
+ "learning_rate": 0.0,
388
+ "loss": 0.3174,
389
+ "step": 590
390
+ },
391
+ {
392
+ "epoch": 3.95,
393
+ "learning_rate": 0.0,
394
+ "loss": 0.4914,
395
+ "step": 600
396
+ },
397
+ {
398
+ "epoch": 4.0,
399
+ "eval_accuracy": 0.7777777777777778,
400
+ "eval_loss": 0.4249849319458008,
401
+ "eval_runtime": 5.3734,
402
+ "eval_samples_per_second": 10.05,
403
+ "eval_steps_per_second": 1.303,
404
+ "step": 608
405
+ },
406
+ {
407
+ "epoch": 4.01,
408
+ "learning_rate": 0.0,
409
+ "loss": 0.2904,
410
+ "step": 610
411
+ },
412
+ {
413
+ "epoch": 4.08,
414
+ "learning_rate": 0.0,
415
+ "loss": 0.433,
416
+ "step": 620
417
+ },
418
+ {
419
+ "epoch": 4.14,
420
+ "learning_rate": 0.0,
421
+ "loss": 0.1684,
422
+ "step": 630
423
+ },
424
+ {
425
+ "epoch": 4.21,
426
+ "learning_rate": 0.0,
427
+ "loss": 0.2903,
428
+ "step": 640
429
+ },
430
+ {
431
+ "epoch": 4.28,
432
+ "learning_rate": 0.0,
433
+ "loss": 0.3659,
434
+ "step": 650
435
+ },
436
+ {
437
+ "epoch": 4.34,
438
+ "learning_rate": 0.0,
439
+ "loss": 0.2125,
440
+ "step": 660
441
+ },
442
+ {
443
+ "epoch": 4.41,
444
+ "learning_rate": 0.0,
445
+ "loss": 0.3639,
446
+ "step": 670
447
+ },
448
+ {
449
+ "epoch": 4.47,
450
+ "learning_rate": 0.0,
451
+ "loss": 0.4092,
452
+ "step": 680
453
+ },
454
+ {
455
+ "epoch": 4.54,
456
+ "learning_rate": 0.0,
457
+ "loss": 0.3467,
458
+ "step": 690
459
+ },
460
+ {
461
+ "epoch": 4.61,
462
+ "learning_rate": 0.0,
463
+ "loss": 0.3013,
464
+ "step": 700
465
+ },
466
+ {
467
+ "epoch": 4.67,
468
+ "learning_rate": 0.0,
469
+ "loss": 0.2546,
470
+ "step": 710
471
+ },
472
+ {
473
+ "epoch": 4.74,
474
+ "learning_rate": 0.0,
475
+ "loss": 0.3741,
476
+ "step": 720
477
+ },
478
+ {
479
+ "epoch": 4.8,
480
+ "learning_rate": 0.0,
481
+ "loss": 0.394,
482
+ "step": 730
483
+ },
484
+ {
485
+ "epoch": 4.87,
486
+ "learning_rate": 0.0,
487
+ "loss": 0.2283,
488
+ "step": 740
489
+ },
490
+ {
491
+ "epoch": 4.93,
492
+ "learning_rate": 0.0,
493
+ "loss": 0.4307,
494
+ "step": 750
495
+ },
496
+ {
497
+ "epoch": 5.0,
498
+ "learning_rate": 0.0,
499
+ "loss": 0.4099,
500
+ "step": 760
501
+ },
502
+ {
503
+ "epoch": 5.0,
504
+ "eval_accuracy": 0.7777777777777778,
505
+ "eval_loss": 0.4249849319458008,
506
+ "eval_runtime": 5.8483,
507
+ "eval_samples_per_second": 9.233,
508
+ "eval_steps_per_second": 1.197,
509
+ "step": 760
510
+ },
511
+ {
512
+ "epoch": 5.0,
513
+ "step": 760,
514
+ "total_flos": 7.945332390564987e+17,
515
+ "train_loss": 0.39741649894337905,
516
+ "train_runtime": 401.1562,
517
+ "train_samples_per_second": 3.789,
518
+ "train_steps_per_second": 1.895
519
  }
520
  ],
521
+ "max_steps": 760,
522
+ "num_train_epochs": 5,
523
+ "total_flos": 7.945332390564987e+17,
524
  "trial_name": null,
525
  "trial_params": null
526
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17eb5b0411fa90a36e2a5ef3742fb9c9089bfe617f2b308d17876d66bfad9ba6
3
- size 3899
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd3da6756d0e2aff4054a6a80b1de56d02c3602827fa3ba6063bd32a09111ae0
3
+ size 3835