kurosekurose commited on
Commit
f3bd17e
1 Parent(s): 2337b29

End of training

Browse files
Files changed (5) hide show
  1. README.md +27 -25
  2. config.json +3 -1
  3. model.safetensors +3 -0
  4. trainer_state.json +178 -146
  5. training_args.bin +2 -2
README.md CHANGED
@@ -1,5 +1,6 @@
1
  ---
2
  license: apache-2.0
 
3
  tags:
4
  - generated_from_trainer
5
  metrics:
@@ -14,10 +15,10 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # wav2vec2-base-EMOPIA
16
 
17
- This model is a fine-tuned version of [facebook/wav2vec2-base](https://huggingface.co/facebook/wav2vec2-base) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 1.4966
20
- - Accuracy: 0.6429
21
 
22
  ## Model description
23
 
@@ -37,12 +38,13 @@ More information needed
37
 
38
  The following hyperparameters were used during training:
39
  - learning_rate: 1e-05
40
- - train_batch_size: 4
41
- - eval_batch_size: 4
42
  - seed: 42
 
 
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: linear
45
- - lr_scheduler_warmup_steps: 500
46
  - num_epochs: 15
47
  - mixed_precision_training: Native AMP
48
 
@@ -50,26 +52,26 @@ The following hyperparameters were used during training:
50
 
51
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
52
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
53
- | 1.3315 | 1.0 | 175 | 1.1984 | 0.3857 |
54
- | 1.0391 | 2.0 | 350 | 1.0071 | 0.5429 |
55
- | 0.8973 | 3.0 | 525 | 0.9441 | 0.5857 |
56
- | 0.8199 | 4.0 | 700 | 0.9984 | 0.5286 |
57
- | 0.7675 | 5.0 | 875 | 0.9477 | 0.6286 |
58
- | 0.6446 | 6.0 | 1050 | 1.0956 | 0.6429 |
59
- | 0.6221 | 7.0 | 1225 | 1.0144 | 0.6286 |
60
- | 0.5543 | 8.0 | 1400 | 1.2473 | 0.6286 |
61
- | 0.5095 | 9.0 | 1575 | 1.2690 | 0.6714 |
62
- | 0.4461 | 10.0 | 1750 | 1.1365 | 0.7286 |
63
- | 0.3678 | 11.0 | 1925 | 1.2065 | 0.7000 |
64
- | 0.3184 | 12.0 | 2100 | 1.6990 | 0.6286 |
65
- | 0.271 | 13.0 | 2275 | 1.5335 | 0.6286 |
66
- | 0.234 | 14.0 | 2450 | 1.3234 | 0.7143 |
67
- | 0.2049 | 15.0 | 2625 | 1.4966 | 0.6429 |
68
 
69
 
70
  ### Framework versions
71
 
72
- - Transformers 4.18.0
73
- - Pytorch 1.10.1+cu102
74
- - Datasets 2.4.0
75
- - Tokenizers 0.12.1
 
1
  ---
2
  license: apache-2.0
3
+ base_model: facebook/wav2vec2-base
4
  tags:
5
  - generated_from_trainer
6
  metrics:
 
15
 
16
  # wav2vec2-base-EMOPIA
17
 
18
+ This model is a fine-tuned version of [facebook/wav2vec2-base](https://huggingface.co/facebook/wav2vec2-base) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 2.1691
21
+ - Accuracy: 0.6338
22
 
23
  ## Model description
24
 
 
38
 
39
  The following hyperparameters were used during training:
40
  - learning_rate: 1e-05
41
+ - train_batch_size: 1
42
+ - eval_batch_size: 1
43
  - seed: 42
44
+ - gradient_accumulation_steps: 3
45
+ - total_train_batch_size: 3
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
 
48
  - num_epochs: 15
49
  - mixed_precision_training: Native AMP
50
 
 
52
 
53
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
54
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
55
+ | 0.8716 | 1.0 | 269 | 0.9822 | 0.6197 |
56
+ | 0.8143 | 2.0 | 538 | 1.2324 | 0.5352 |
57
+ | 0.7584 | 3.0 | 807 | 1.0226 | 0.6479 |
58
+ | 0.6715 | 4.0 | 1076 | 0.9550 | 0.6620 |
59
+ | 0.6471 | 5.0 | 1345 | 1.1272 | 0.6761 |
60
+ | 0.5759 | 6.0 | 1614 | 1.2193 | 0.6761 |
61
+ | 0.4963 | 7.0 | 1883 | 1.2214 | 0.7183 |
62
+ | 0.4053 | 8.0 | 2152 | 1.3083 | 0.7465 |
63
+ | 0.3344 | 9.0 | 2421 | 1.6391 | 0.6620 |
64
+ | 0.3216 | 10.0 | 2690 | 1.7224 | 0.6479 |
65
+ | 0.2248 | 11.0 | 2959 | 1.7973 | 0.6761 |
66
+ | 0.1982 | 12.0 | 3228 | 2.0241 | 0.6479 |
67
+ | 0.1362 | 13.0 | 3497 | 1.9933 | 0.6479 |
68
+ | 0.0879 | 14.0 | 3766 | 2.0865 | 0.6479 |
69
+ | 0.0712 | 15.0 | 4035 | 2.1691 | 0.6338 |
70
 
71
 
72
  ### Framework versions
73
 
74
+ - Transformers 4.42.2
75
+ - Pytorch 2.3.1+cu118
76
+ - Datasets 2.20.0
77
+ - Tokenizers 0.19.1
config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "_name_or_path": "facebook/wav2vec2-base",
3
  "activation_dropout": 0.0,
 
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
6
  "add_adapter": false,
@@ -53,6 +54,7 @@
53
  "final_dropout": 0.0,
54
  "finetuning_task": "facebook/wav2vec2-base",
55
  "freeze_feat_extract_train": true,
 
56
  "hidden_act": "gelu",
57
  "hidden_dropout": 0.1,
58
  "hidden_size": 768,
@@ -125,7 +127,7 @@
125
  1
126
  ],
127
  "torch_dtype": "float32",
128
- "transformers_version": "4.18.0",
129
  "use_weighted_layer_sum": false,
130
  "vocab_size": 32,
131
  "xvector_output_dim": 512
 
1
  {
2
  "_name_or_path": "facebook/wav2vec2-base",
3
  "activation_dropout": 0.0,
4
+ "adapter_attn_dim": null,
5
  "adapter_kernel_size": 3,
6
  "adapter_stride": 2,
7
  "add_adapter": false,
 
54
  "final_dropout": 0.0,
55
  "finetuning_task": "facebook/wav2vec2-base",
56
  "freeze_feat_extract_train": true,
57
+ "gradient_checkpointing": false,
58
  "hidden_act": "gelu",
59
  "hidden_dropout": 0.1,
60
  "hidden_size": 768,
 
127
  1
128
  ],
129
  "torch_dtype": "float32",
130
+ "transformers_version": "4.42.2",
131
  "use_weighted_layer_sum": false,
132
  "vocab_size": 32,
133
  "xvector_output_dim": 512
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07a19119aa6c8a53c8089247222f6c8e7cdffbc0f38560d051fa0c6e69e54868
3
+ size 379887584
trainer_state.json CHANGED
@@ -1,250 +1,282 @@
1
  {
2
- "best_metric": 0.9441239833831787,
3
- "best_model_checkpoint": "kurosekurose/wav2vec2-base-EMOPIA/checkpoint-525",
4
  "epoch": 15.0,
5
- "global_step": 2625,
 
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "learning_rate": 3.5e-06,
13
- "loss": 1.3315,
14
- "step": 175
 
15
  },
16
  {
17
  "epoch": 1.0,
18
- "eval_accuracy": 0.3857142925262451,
19
- "eval_loss": 1.1984184980392456,
20
- "eval_runtime": 48.1732,
21
- "eval_samples_per_second": 1.453,
22
- "eval_steps_per_second": 0.374,
23
- "step": 175
24
  },
25
  {
26
  "epoch": 2.0,
27
- "learning_rate": 7e-06,
28
- "loss": 1.0391,
29
- "step": 350
 
30
  },
31
  {
32
  "epoch": 2.0,
33
- "eval_accuracy": 0.5428571701049805,
34
- "eval_loss": 1.0071275234222412,
35
- "eval_runtime": 48.3665,
36
- "eval_samples_per_second": 1.447,
37
- "eval_steps_per_second": 0.372,
38
- "step": 350
39
  },
40
  {
41
  "epoch": 3.0,
42
- "learning_rate": 9.887058823529413e-06,
43
- "loss": 0.8973,
44
- "step": 525
 
45
  },
46
  {
47
  "epoch": 3.0,
48
- "eval_accuracy": 0.5857142806053162,
49
- "eval_loss": 0.9441239833831787,
50
- "eval_runtime": 48.2354,
51
- "eval_samples_per_second": 1.451,
52
- "eval_steps_per_second": 0.373,
53
- "step": 525
54
  },
55
  {
56
  "epoch": 4.0,
57
- "learning_rate": 9.063529411764706e-06,
58
- "loss": 0.8199,
59
- "step": 700
 
60
  },
61
  {
62
  "epoch": 4.0,
63
- "eval_accuracy": 0.5285714268684387,
64
- "eval_loss": 0.9983876347541809,
65
- "eval_runtime": 48.536,
66
- "eval_samples_per_second": 1.442,
67
- "eval_steps_per_second": 0.371,
68
- "step": 700
69
  },
70
  {
71
  "epoch": 5.0,
72
- "learning_rate": 8.244705882352942e-06,
73
- "loss": 0.7675,
74
- "step": 875
 
75
  },
76
  {
77
  "epoch": 5.0,
78
- "eval_accuracy": 0.6285714507102966,
79
- "eval_loss": 0.9477005004882812,
80
- "eval_runtime": 48.23,
81
- "eval_samples_per_second": 1.451,
82
- "eval_steps_per_second": 0.373,
83
- "step": 875
84
  },
85
  {
86
  "epoch": 6.0,
87
- "learning_rate": 7.421176470588236e-06,
88
- "loss": 0.6446,
89
- "step": 1050
 
90
  },
91
  {
92
  "epoch": 6.0,
93
- "eval_accuracy": 0.6428571343421936,
94
- "eval_loss": 1.0956463813781738,
95
- "eval_runtime": 48.136,
96
- "eval_samples_per_second": 1.454,
97
- "eval_steps_per_second": 0.374,
98
- "step": 1050
99
  },
100
  {
101
  "epoch": 7.0,
102
- "learning_rate": 6.59764705882353e-06,
103
- "loss": 0.6221,
104
- "step": 1225
 
105
  },
106
  {
107
  "epoch": 7.0,
108
- "eval_accuracy": 0.6285714507102966,
109
- "eval_loss": 1.0143612623214722,
110
- "eval_runtime": 48.3709,
111
- "eval_samples_per_second": 1.447,
112
- "eval_steps_per_second": 0.372,
113
- "step": 1225
114
  },
115
  {
116
  "epoch": 8.0,
117
- "learning_rate": 5.778823529411765e-06,
118
- "loss": 0.5543,
119
- "step": 1400
 
120
  },
121
  {
122
  "epoch": 8.0,
123
- "eval_accuracy": 0.6285714507102966,
124
- "eval_loss": 1.24734365940094,
125
- "eval_runtime": 48.144,
126
- "eval_samples_per_second": 1.454,
127
- "eval_steps_per_second": 0.374,
128
- "step": 1400
129
  },
130
  {
131
  "epoch": 9.0,
132
- "learning_rate": 4.955294117647059e-06,
133
- "loss": 0.5095,
134
- "step": 1575
 
135
  },
136
  {
137
  "epoch": 9.0,
138
- "eval_accuracy": 0.6714285612106323,
139
- "eval_loss": 1.2690364122390747,
140
- "eval_runtime": 48.163,
141
- "eval_samples_per_second": 1.453,
142
- "eval_steps_per_second": 0.374,
143
- "step": 1575
144
  },
145
  {
146
  "epoch": 10.0,
147
- "learning_rate": 4.1317647058823535e-06,
148
- "loss": 0.4461,
149
- "step": 1750
 
150
  },
151
  {
152
  "epoch": 10.0,
153
- "eval_accuracy": 0.7285714149475098,
154
- "eval_loss": 1.1364508867263794,
155
- "eval_runtime": 48.2055,
156
- "eval_samples_per_second": 1.452,
157
- "eval_steps_per_second": 0.373,
158
- "step": 1750
159
  },
160
  {
161
  "epoch": 11.0,
162
- "learning_rate": 3.3129411764705886e-06,
163
- "loss": 0.3678,
164
- "step": 1925
 
165
  },
166
  {
167
  "epoch": 11.0,
168
- "eval_accuracy": 0.699999988079071,
169
- "eval_loss": 1.2065324783325195,
170
- "eval_runtime": 48.5129,
171
- "eval_samples_per_second": 1.443,
172
- "eval_steps_per_second": 0.371,
173
- "step": 1925
174
  },
175
  {
176
  "epoch": 12.0,
177
- "learning_rate": 2.4894117647058827e-06,
178
- "loss": 0.3184,
179
- "step": 2100
 
180
  },
181
  {
182
  "epoch": 12.0,
183
- "eval_accuracy": 0.6285714507102966,
184
- "eval_loss": 1.699022889137268,
185
- "eval_runtime": 48.0723,
186
- "eval_samples_per_second": 1.456,
187
- "eval_steps_per_second": 0.374,
188
- "step": 2100
189
  },
190
  {
191
  "epoch": 13.0,
192
- "learning_rate": 1.6658823529411766e-06,
193
- "loss": 0.271,
194
- "step": 2275
 
195
  },
196
  {
197
  "epoch": 13.0,
198
- "eval_accuracy": 0.6285714507102966,
199
- "eval_loss": 1.5335015058517456,
200
- "eval_runtime": 48.3723,
201
- "eval_samples_per_second": 1.447,
202
- "eval_steps_per_second": 0.372,
203
- "step": 2275
204
  },
205
  {
206
  "epoch": 14.0,
207
- "learning_rate": 8.423529411764707e-07,
208
- "loss": 0.234,
209
- "step": 2450
 
210
  },
211
  {
212
  "epoch": 14.0,
213
- "eval_accuracy": 0.7142857313156128,
214
- "eval_loss": 1.3233639001846313,
215
- "eval_runtime": 47.9369,
216
- "eval_samples_per_second": 1.46,
217
- "eval_steps_per_second": 0.375,
218
- "step": 2450
219
  },
220
  {
221
  "epoch": 15.0,
222
- "learning_rate": 1.8823529411764708e-08,
223
- "loss": 0.2049,
224
- "step": 2625
 
225
  },
226
  {
227
  "epoch": 15.0,
228
- "eval_accuracy": 0.6428571343421936,
229
- "eval_loss": 1.4966269731521606,
230
- "eval_runtime": 48.2524,
231
- "eval_samples_per_second": 1.451,
232
- "eval_steps_per_second": 0.373,
233
- "step": 2625
234
  },
235
  {
236
  "epoch": 15.0,
237
- "step": 2625,
238
- "total_flos": 4.992891844826972e+18,
239
- "train_loss": 0.6018750116257441,
240
- "train_runtime": 20482.8582,
241
- "train_samples_per_second": 0.513,
242
- "train_steps_per_second": 0.128
243
  }
244
  ],
245
- "max_steps": 2625,
 
 
246
  "num_train_epochs": 15,
247
- "total_flos": 4.992891844826972e+18,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  "trial_name": null,
249
  "trial_params": null
250
  }
 
1
  {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
  "epoch": 15.0,
5
+ "eval_steps": 500,
6
+ "global_step": 4035,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 13.118809700012207,
14
+ "learning_rate": 9.335811648079307e-06,
15
+ "loss": 0.8716,
16
+ "step": 269
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.6197183132171631,
21
+ "eval_loss": 0.9822000861167908,
22
+ "eval_runtime": 32.2509,
23
+ "eval_samples_per_second": 2.201,
24
+ "eval_steps_per_second": 2.201,
25
+ "step": 269
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "grad_norm": 10.717401504516602,
30
+ "learning_rate": 8.66914498141264e-06,
31
+ "loss": 0.8143,
32
+ "step": 538
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_accuracy": 0.5352112650871277,
37
+ "eval_loss": 1.232447624206543,
38
+ "eval_runtime": 32.3951,
39
+ "eval_samples_per_second": 2.192,
40
+ "eval_steps_per_second": 2.192,
41
+ "step": 538
42
  },
43
  {
44
  "epoch": 3.0,
45
+ "grad_norm": 23.701391220092773,
46
+ "learning_rate": 8.004956629491945e-06,
47
+ "loss": 0.7584,
48
+ "step": 807
49
  },
50
  {
51
  "epoch": 3.0,
52
+ "eval_accuracy": 0.6478873491287231,
53
+ "eval_loss": 1.0226496458053589,
54
+ "eval_runtime": 32.3,
55
+ "eval_samples_per_second": 2.198,
56
+ "eval_steps_per_second": 2.198,
57
+ "step": 807
58
  },
59
  {
60
  "epoch": 4.0,
61
+ "grad_norm": 33.54118728637695,
62
+ "learning_rate": 7.340768277571252e-06,
63
+ "loss": 0.6715,
64
+ "step": 1076
65
  },
66
  {
67
  "epoch": 4.0,
68
+ "eval_accuracy": 0.6619718074798584,
69
+ "eval_loss": 0.9550462961196899,
70
+ "eval_runtime": 32.4715,
71
+ "eval_samples_per_second": 2.187,
72
+ "eval_steps_per_second": 2.187,
73
+ "step": 1076
74
  },
75
  {
76
  "epoch": 5.0,
77
+ "grad_norm": 30.0217227935791,
78
+ "learning_rate": 6.674101610904585e-06,
79
+ "loss": 0.6471,
80
+ "step": 1345
81
  },
82
  {
83
  "epoch": 5.0,
84
+ "eval_accuracy": 0.6760563254356384,
85
+ "eval_loss": 1.1272403001785278,
86
+ "eval_runtime": 32.2183,
87
+ "eval_samples_per_second": 2.204,
88
+ "eval_steps_per_second": 2.204,
89
+ "step": 1345
90
  },
91
  {
92
  "epoch": 6.0,
93
+ "grad_norm": 0.36277323961257935,
94
+ "learning_rate": 6.009913258983892e-06,
95
+ "loss": 0.5759,
96
+ "step": 1614
97
  },
98
  {
99
  "epoch": 6.0,
100
+ "eval_accuracy": 0.6760563254356384,
101
+ "eval_loss": 1.2193043231964111,
102
+ "eval_runtime": 32.3011,
103
+ "eval_samples_per_second": 2.198,
104
+ "eval_steps_per_second": 2.198,
105
+ "step": 1614
106
  },
107
  {
108
  "epoch": 7.0,
109
+ "grad_norm": 17.32307243347168,
110
+ "learning_rate": 5.343246592317225e-06,
111
+ "loss": 0.4963,
112
+ "step": 1883
113
  },
114
  {
115
  "epoch": 7.0,
116
+ "eval_accuracy": 0.7183098793029785,
117
+ "eval_loss": 1.221394658088684,
118
+ "eval_runtime": 32.1815,
119
+ "eval_samples_per_second": 2.206,
120
+ "eval_steps_per_second": 2.206,
121
+ "step": 1883
122
  },
123
  {
124
  "epoch": 8.0,
125
+ "grad_norm": 0.09576527029275894,
126
+ "learning_rate": 4.679058240396531e-06,
127
+ "loss": 0.4053,
128
+ "step": 2152
129
  },
130
  {
131
  "epoch": 8.0,
132
+ "eval_accuracy": 0.7464788556098938,
133
+ "eval_loss": 1.3082976341247559,
134
+ "eval_runtime": 32.7134,
135
+ "eval_samples_per_second": 2.17,
136
+ "eval_steps_per_second": 2.17,
137
+ "step": 2152
138
  },
139
  {
140
  "epoch": 9.0,
141
+ "grad_norm": 73.52884674072266,
142
+ "learning_rate": 4.012391573729864e-06,
143
+ "loss": 0.3344,
144
+ "step": 2421
145
  },
146
  {
147
  "epoch": 9.0,
148
+ "eval_accuracy": 0.6619718074798584,
149
+ "eval_loss": 1.6390645503997803,
150
+ "eval_runtime": 32.8911,
151
+ "eval_samples_per_second": 2.159,
152
+ "eval_steps_per_second": 2.159,
153
+ "step": 2421
154
  },
155
  {
156
  "epoch": 10.0,
157
+ "grad_norm": 242.48890686035156,
158
+ "learning_rate": 3.34820322180917e-06,
159
+ "loss": 0.3216,
160
+ "step": 2690
161
  },
162
  {
163
  "epoch": 10.0,
164
+ "eval_accuracy": 0.6478873491287231,
165
+ "eval_loss": 1.722383737564087,
166
+ "eval_runtime": 32.4813,
167
+ "eval_samples_per_second": 2.186,
168
+ "eval_steps_per_second": 2.186,
169
+ "step": 2690
170
  },
171
  {
172
  "epoch": 11.0,
173
+ "grad_norm": 376.0904541015625,
174
+ "learning_rate": 2.6815365551425034e-06,
175
+ "loss": 0.2248,
176
+ "step": 2959
177
  },
178
  {
179
  "epoch": 11.0,
180
+ "eval_accuracy": 0.6760563254356384,
181
+ "eval_loss": 1.7972948551177979,
182
+ "eval_runtime": 32.2962,
183
+ "eval_samples_per_second": 2.198,
184
+ "eval_steps_per_second": 2.198,
185
+ "step": 2959
186
  },
187
  {
188
  "epoch": 12.0,
189
+ "grad_norm": 38.31782913208008,
190
+ "learning_rate": 2.0148698884758364e-06,
191
+ "loss": 0.1982,
192
+ "step": 3228
193
  },
194
  {
195
  "epoch": 12.0,
196
+ "eval_accuracy": 0.6478873491287231,
197
+ "eval_loss": 2.02411150932312,
198
+ "eval_runtime": 32.4081,
199
+ "eval_samples_per_second": 2.191,
200
+ "eval_steps_per_second": 2.191,
201
+ "step": 3228
202
  },
203
  {
204
  "epoch": 13.0,
205
+ "grad_norm": 0.5905938744544983,
206
+ "learning_rate": 1.3482032218091697e-06,
207
+ "loss": 0.1362,
208
+ "step": 3497
209
  },
210
  {
211
  "epoch": 13.0,
212
+ "eval_accuracy": 0.6478873491287231,
213
+ "eval_loss": 1.9932571649551392,
214
+ "eval_runtime": 32.4096,
215
+ "eval_samples_per_second": 2.191,
216
+ "eval_steps_per_second": 2.191,
217
+ "step": 3497
218
  },
219
  {
220
  "epoch": 14.0,
221
+ "grad_norm": 2.5972626209259033,
222
+ "learning_rate": 6.815365551425032e-07,
223
+ "loss": 0.0879,
224
+ "step": 3766
225
  },
226
  {
227
  "epoch": 14.0,
228
+ "eval_accuracy": 0.6478873491287231,
229
+ "eval_loss": 2.086475372314453,
230
+ "eval_runtime": 32.8946,
231
+ "eval_samples_per_second": 2.158,
232
+ "eval_steps_per_second": 2.158,
233
+ "step": 3766
234
  },
235
  {
236
  "epoch": 15.0,
237
+ "grad_norm": 0.036518827080726624,
238
+ "learning_rate": 1.4869888475836432e-08,
239
+ "loss": 0.0712,
240
+ "step": 4035
241
  },
242
  {
243
  "epoch": 15.0,
244
+ "eval_accuracy": 0.6338028311729431,
245
+ "eval_loss": 2.1690945625305176,
246
+ "eval_runtime": 32.3643,
247
+ "eval_samples_per_second": 2.194,
248
+ "eval_steps_per_second": 2.194,
249
+ "step": 4035
250
  },
251
  {
252
  "epoch": 15.0,
253
+ "step": 4035,
254
+ "total_flos": 7.1272809219168e+18,
255
+ "train_loss": 0.44098070326643213,
256
+ "train_runtime": 9434.6926,
257
+ "train_samples_per_second": 1.283,
258
+ "train_steps_per_second": 0.428
259
  }
260
  ],
261
+ "logging_steps": 8,
262
+ "max_steps": 4035,
263
+ "num_input_tokens_seen": 0,
264
  "num_train_epochs": 15,
265
+ "save_steps": 500,
266
+ "stateful_callbacks": {
267
+ "TrainerControl": {
268
+ "args": {
269
+ "should_epoch_stop": false,
270
+ "should_evaluate": false,
271
+ "should_log": false,
272
+ "should_save": false,
273
+ "should_training_stop": false
274
+ },
275
+ "attributes": {}
276
+ }
277
+ },
278
+ "total_flos": 7.1272809219168e+18,
279
+ "train_batch_size": 1,
280
  "trial_name": null,
281
  "trial_params": null
282
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64952a100eb670e38f7986031bb278f14e1f6d77290a34a2990eb65a138078b4
3
- size 3055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32a32a793e09c20ba8ccdd195ed10a1762d2e2711caef8e57cb8228ccdadcf50
3
+ size 5176