csikasote commited on
Commit
9ebe00c
1 Parent(s): 0225c75

End of training

Browse files
Files changed (5) hide show
  1. README.md +5 -3
  2. all_results.json +15 -0
  3. eval_results.json +9 -0
  4. train_results.json +9 -0
  5. trainer_state.json +364 -0
README.md CHANGED
@@ -3,6 +3,8 @@ library_name: transformers
3
  license: mit
4
  base_model: facebook/w2v-bert-2.0
5
  tags:
 
 
6
  - generated_from_trainer
7
  metrics:
8
  - wer
@@ -16,10 +18,10 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # w2v-bert-bem-bl
18
 
19
- This model is a fine-tuned version of [facebook/w2v-bert-2.0](https://huggingface.co/facebook/w2v-bert-2.0) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.2403
22
- - Wer: 0.4353
23
 
24
  ## Model description
25
 
 
3
  license: mit
4
  base_model: facebook/w2v-bert-2.0
5
  tags:
6
+ - automatic-speech-recognition
7
+ - BembaSpeech
8
  - generated_from_trainer
9
  metrics:
10
  - wer
 
18
 
19
  # w2v-bert-bem-bl
20
 
21
+ This model is a fine-tuned version of [facebook/w2v-bert-2.0](https://huggingface.co/facebook/w2v-bert-2.0) on the BEMBASPEECH - BEM dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.2136
24
+ - Wer: 0.4539
25
 
26
  ## Model description
27
 
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.99297259311314,
3
+ "eval_loss": 0.21356959640979767,
4
+ "eval_runtime": 55.8886,
5
+ "eval_samples": 1492,
6
+ "eval_samples_per_second": 26.696,
7
+ "eval_steps_per_second": 3.346,
8
+ "eval_wer": 0.45393200103815207,
9
+ "total_flos": 2.094794810533582e+19,
10
+ "train_loss": 0.25149581422282674,
11
+ "train_runtime": 9293.6781,
12
+ "train_samples": 11377,
13
+ "train_samples_per_second": 12.242,
14
+ "train_steps_per_second": 0.765
15
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.99297259311314,
3
+ "eval_loss": 0.21356959640979767,
4
+ "eval_runtime": 55.8886,
5
+ "eval_samples": 1492,
6
+ "eval_samples_per_second": 26.696,
7
+ "eval_steps_per_second": 3.346,
8
+ "eval_wer": 0.45393200103815207
9
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.99297259311314,
3
+ "total_flos": 2.094794810533582e+19,
4
+ "train_loss": 0.25149581422282674,
5
+ "train_runtime": 9293.6781,
6
+ "train_samples": 11377,
7
+ "train_samples_per_second": 12.242,
8
+ "train_steps_per_second": 0.765
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.21356959640979767,
3
+ "best_model_checkpoint": "/scratch/skscla001/results/w2v-bert-bem-bl/checkpoint-5500",
4
+ "epoch": 9.99297259311314,
5
+ "eval_steps": 500,
6
+ "global_step": 7110,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.35137034434293746,
13
+ "grad_norm": 6.841489791870117,
14
+ "learning_rate": 0.00014879999999999998,
15
+ "loss": 1.1986,
16
+ "step": 250
17
+ },
18
+ {
19
+ "epoch": 0.7027406886858749,
20
+ "grad_norm": 5.7157793045043945,
21
+ "learning_rate": 0.0002988,
22
+ "loss": 0.5344,
23
+ "step": 500
24
+ },
25
+ {
26
+ "epoch": 0.7027406886858749,
27
+ "eval_loss": 0.5448043942451477,
28
+ "eval_runtime": 58.822,
29
+ "eval_samples_per_second": 25.365,
30
+ "eval_steps_per_second": 3.179,
31
+ "eval_wer": 0.7378665974565274,
32
+ "step": 500
33
+ },
34
+ {
35
+ "epoch": 1.0541110330288124,
36
+ "grad_norm": 1.5800367593765259,
37
+ "learning_rate": 0.00028874432677760965,
38
+ "loss": 0.4918,
39
+ "step": 750
40
+ },
41
+ {
42
+ "epoch": 1.4054813773717498,
43
+ "grad_norm": 1.6567301750183105,
44
+ "learning_rate": 0.0002774432677760968,
45
+ "loss": 0.4242,
46
+ "step": 1000
47
+ },
48
+ {
49
+ "epoch": 1.4054813773717498,
50
+ "eval_loss": 0.3054925501346588,
51
+ "eval_runtime": 56.7685,
52
+ "eval_samples_per_second": 26.282,
53
+ "eval_steps_per_second": 3.294,
54
+ "eval_wer": 0.6024742624794532,
55
+ "step": 1000
56
+ },
57
+ {
58
+ "epoch": 1.7568517217146873,
59
+ "grad_norm": 1.094335675239563,
60
+ "learning_rate": 0.0002660968229954614,
61
+ "loss": 0.3958,
62
+ "step": 1250
63
+ },
64
+ {
65
+ "epoch": 2.1082220660576247,
66
+ "grad_norm": 1.0848636627197266,
67
+ "learning_rate": 0.000254750378214826,
68
+ "loss": 0.3603,
69
+ "step": 1500
70
+ },
71
+ {
72
+ "epoch": 2.1082220660576247,
73
+ "eval_loss": 0.26925167441368103,
74
+ "eval_runtime": 56.9762,
75
+ "eval_samples_per_second": 26.186,
76
+ "eval_steps_per_second": 3.282,
77
+ "eval_wer": 0.5385413963145601,
78
+ "step": 1500
79
+ },
80
+ {
81
+ "epoch": 2.459592410400562,
82
+ "grad_norm": 0.9221932888031006,
83
+ "learning_rate": 0.00024340393343419062,
84
+ "loss": 0.3228,
85
+ "step": 1750
86
+ },
87
+ {
88
+ "epoch": 2.8109627547434997,
89
+ "grad_norm": 1.12798273563385,
90
+ "learning_rate": 0.0002320574886535552,
91
+ "loss": 0.3144,
92
+ "step": 2000
93
+ },
94
+ {
95
+ "epoch": 2.8109627547434997,
96
+ "eval_loss": 0.2683100700378418,
97
+ "eval_runtime": 56.9393,
98
+ "eval_samples_per_second": 26.203,
99
+ "eval_steps_per_second": 3.284,
100
+ "eval_wer": 0.5529025002162817,
101
+ "step": 2000
102
+ },
103
+ {
104
+ "epoch": 3.162333099086437,
105
+ "grad_norm": 0.6111273169517517,
106
+ "learning_rate": 0.0002207110438729198,
107
+ "loss": 0.2931,
108
+ "step": 2250
109
+ },
110
+ {
111
+ "epoch": 3.5137034434293746,
112
+ "grad_norm": 0.5683824419975281,
113
+ "learning_rate": 0.0002093645990922844,
114
+ "loss": 0.2656,
115
+ "step": 2500
116
+ },
117
+ {
118
+ "epoch": 3.5137034434293746,
119
+ "eval_loss": 0.24722729623317719,
120
+ "eval_runtime": 57.2449,
121
+ "eval_samples_per_second": 26.063,
122
+ "eval_steps_per_second": 3.267,
123
+ "eval_wer": 0.5258240332208669,
124
+ "step": 2500
125
+ },
126
+ {
127
+ "epoch": 3.865073787772312,
128
+ "grad_norm": 0.7263462543487549,
129
+ "learning_rate": 0.000198018154311649,
130
+ "loss": 0.2574,
131
+ "step": 2750
132
+ },
133
+ {
134
+ "epoch": 4.2164441321152495,
135
+ "grad_norm": 0.422783762216568,
136
+ "learning_rate": 0.0001866717095310136,
137
+ "loss": 0.2311,
138
+ "step": 3000
139
+ },
140
+ {
141
+ "epoch": 4.2164441321152495,
142
+ "eval_loss": 0.2352277785539627,
143
+ "eval_runtime": 56.81,
144
+ "eval_samples_per_second": 26.263,
145
+ "eval_steps_per_second": 3.292,
146
+ "eval_wer": 0.5025521238861493,
147
+ "step": 3000
148
+ },
149
+ {
150
+ "epoch": 4.567814476458187,
151
+ "grad_norm": 0.5190473794937134,
152
+ "learning_rate": 0.00017532526475037822,
153
+ "loss": 0.2133,
154
+ "step": 3250
155
+ },
156
+ {
157
+ "epoch": 4.919184820801124,
158
+ "grad_norm": 0.7750300765037537,
159
+ "learning_rate": 0.0001639788199697428,
160
+ "loss": 0.2106,
161
+ "step": 3500
162
+ },
163
+ {
164
+ "epoch": 4.919184820801124,
165
+ "eval_loss": 0.2326769232749939,
166
+ "eval_runtime": 56.8929,
167
+ "eval_samples_per_second": 26.225,
168
+ "eval_steps_per_second": 3.287,
169
+ "eval_wer": 0.5003027943593736,
170
+ "step": 3500
171
+ },
172
+ {
173
+ "epoch": 5.270555165144062,
174
+ "grad_norm": 0.37189096212387085,
175
+ "learning_rate": 0.0001526323751891074,
176
+ "loss": 0.1835,
177
+ "step": 3750
178
+ },
179
+ {
180
+ "epoch": 5.621925509486999,
181
+ "grad_norm": 0.5811314582824707,
182
+ "learning_rate": 0.000141285930408472,
183
+ "loss": 0.1816,
184
+ "step": 4000
185
+ },
186
+ {
187
+ "epoch": 5.621925509486999,
188
+ "eval_loss": 0.22979679703712463,
189
+ "eval_runtime": 56.9781,
190
+ "eval_samples_per_second": 26.185,
191
+ "eval_steps_per_second": 3.282,
192
+ "eval_wer": 0.49865905355134527,
193
+ "step": 4000
194
+ },
195
+ {
196
+ "epoch": 5.973295853829937,
197
+ "grad_norm": 1.2501654624938965,
198
+ "learning_rate": 0.0001299394856278366,
199
+ "loss": 0.1804,
200
+ "step": 4250
201
+ },
202
+ {
203
+ "epoch": 6.324666198172874,
204
+ "grad_norm": 0.40958958864212036,
205
+ "learning_rate": 0.0001185930408472012,
206
+ "loss": 0.1432,
207
+ "step": 4500
208
+ },
209
+ {
210
+ "epoch": 6.324666198172874,
211
+ "eval_loss": 0.21775686740875244,
212
+ "eval_runtime": 57.4498,
213
+ "eval_samples_per_second": 25.97,
214
+ "eval_steps_per_second": 3.255,
215
+ "eval_wer": 0.46855264296219395,
216
+ "step": 4500
217
+ },
218
+ {
219
+ "epoch": 6.676036542515812,
220
+ "grad_norm": 0.5354466438293457,
221
+ "learning_rate": 0.0001072465960665658,
222
+ "loss": 0.1475,
223
+ "step": 4750
224
+ },
225
+ {
226
+ "epoch": 7.027406886858749,
227
+ "grad_norm": 0.44123438000679016,
228
+ "learning_rate": 9.59001512859304e-05,
229
+ "loss": 0.1431,
230
+ "step": 5000
231
+ },
232
+ {
233
+ "epoch": 7.027406886858749,
234
+ "eval_loss": 0.21715782582759857,
235
+ "eval_runtime": 57.0515,
236
+ "eval_samples_per_second": 26.152,
237
+ "eval_steps_per_second": 3.278,
238
+ "eval_wer": 0.47469504282377367,
239
+ "step": 5000
240
+ },
241
+ {
242
+ "epoch": 7.378777231201687,
243
+ "grad_norm": 0.29582667350769043,
244
+ "learning_rate": 8.4553706505295e-05,
245
+ "loss": 0.1111,
246
+ "step": 5250
247
+ },
248
+ {
249
+ "epoch": 7.730147575544624,
250
+ "grad_norm": 0.5549105405807495,
251
+ "learning_rate": 7.32072617246596e-05,
252
+ "loss": 0.1069,
253
+ "step": 5500
254
+ },
255
+ {
256
+ "epoch": 7.730147575544624,
257
+ "eval_loss": 0.21356959640979767,
258
+ "eval_runtime": 56.8619,
259
+ "eval_samples_per_second": 26.239,
260
+ "eval_steps_per_second": 3.289,
261
+ "eval_wer": 0.45393200103815207,
262
+ "step": 5500
263
+ },
264
+ {
265
+ "epoch": 8.081517919887562,
266
+ "grad_norm": 0.632247269153595,
267
+ "learning_rate": 6.18608169440242e-05,
268
+ "loss": 0.1045,
269
+ "step": 5750
270
+ },
271
+ {
272
+ "epoch": 8.432888264230499,
273
+ "grad_norm": 0.3406793475151062,
274
+ "learning_rate": 5.05143721633888e-05,
275
+ "loss": 0.0767,
276
+ "step": 6000
277
+ },
278
+ {
279
+ "epoch": 8.432888264230499,
280
+ "eval_loss": 0.22697694599628448,
281
+ "eval_runtime": 56.8774,
282
+ "eval_samples_per_second": 26.232,
283
+ "eval_steps_per_second": 3.288,
284
+ "eval_wer": 0.4403495112033913,
285
+ "step": 6000
286
+ },
287
+ {
288
+ "epoch": 8.784258608573436,
289
+ "grad_norm": 0.4026772379875183,
290
+ "learning_rate": 3.91679273827534e-05,
291
+ "loss": 0.0781,
292
+ "step": 6250
293
+ },
294
+ {
295
+ "epoch": 9.135628952916374,
296
+ "grad_norm": 0.4836815595626831,
297
+ "learning_rate": 2.7821482602118e-05,
298
+ "loss": 0.0667,
299
+ "step": 6500
300
+ },
301
+ {
302
+ "epoch": 9.135628952916374,
303
+ "eval_loss": 0.23749949038028717,
304
+ "eval_runtime": 56.671,
305
+ "eval_samples_per_second": 26.327,
306
+ "eval_steps_per_second": 3.3,
307
+ "eval_wer": 0.4385327450471494,
308
+ "step": 6500
309
+ },
310
+ {
311
+ "epoch": 9.486999297259311,
312
+ "grad_norm": 0.17178182303905487,
313
+ "learning_rate": 1.64750378214826e-05,
314
+ "loss": 0.0502,
315
+ "step": 6750
316
+ },
317
+ {
318
+ "epoch": 9.838369641602249,
319
+ "grad_norm": 0.3071761131286621,
320
+ "learning_rate": 5.128593040847201e-06,
321
+ "loss": 0.0468,
322
+ "step": 7000
323
+ },
324
+ {
325
+ "epoch": 9.838369641602249,
326
+ "eval_loss": 0.2403486669063568,
327
+ "eval_runtime": 56.6826,
328
+ "eval_samples_per_second": 26.322,
329
+ "eval_steps_per_second": 3.299,
330
+ "eval_wer": 0.4353317761051994,
331
+ "step": 7000
332
+ },
333
+ {
334
+ "epoch": 9.99297259311314,
335
+ "step": 7110,
336
+ "total_flos": 2.094794810533582e+19,
337
+ "train_loss": 0.25149581422282674,
338
+ "train_runtime": 9293.6781,
339
+ "train_samples_per_second": 12.242,
340
+ "train_steps_per_second": 0.765
341
+ }
342
+ ],
343
+ "logging_steps": 250,
344
+ "max_steps": 7110,
345
+ "num_input_tokens_seen": 0,
346
+ "num_train_epochs": 10,
347
+ "save_steps": 500,
348
+ "stateful_callbacks": {
349
+ "TrainerControl": {
350
+ "args": {
351
+ "should_epoch_stop": false,
352
+ "should_evaluate": false,
353
+ "should_log": false,
354
+ "should_save": true,
355
+ "should_training_stop": true
356
+ },
357
+ "attributes": {}
358
+ }
359
+ },
360
+ "total_flos": 2.094794810533582e+19,
361
+ "train_batch_size": 8,
362
+ "trial_name": null,
363
+ "trial_params": null
364
+ }