ihanif commited on
Commit
555ac92
1 Parent(s): 7deaa3e

add best WER checkopoint at 3000 steps

Browse files
checkpoint-3000/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/whisper-small",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "architectures": [
6
+ "WhisperForConditionalGeneration"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "begin_suppress_tokens": [
10
+ 220,
11
+ 50257
12
+ ],
13
+ "bos_token_id": 50257,
14
+ "d_model": 768,
15
+ "decoder_attention_heads": 12,
16
+ "decoder_ffn_dim": 3072,
17
+ "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 12,
19
+ "decoder_start_token_id": 50258,
20
+ "dropout": 0.0,
21
+ "encoder_attention_heads": 12,
22
+ "encoder_ffn_dim": 3072,
23
+ "encoder_layerdrop": 0.0,
24
+ "encoder_layers": 12,
25
+ "eos_token_id": 50257,
26
+ "forced_decoder_ids": null,
27
+ "init_std": 0.02,
28
+ "is_encoder_decoder": true,
29
+ "max_length": 448,
30
+ "max_source_positions": 1500,
31
+ "max_target_positions": 448,
32
+ "model_type": "whisper",
33
+ "num_hidden_layers": 12,
34
+ "num_mel_bins": 80,
35
+ "pad_token_id": 50257,
36
+ "scale_embedding": false,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.26.0.dev0",
39
+ "use_cache": false,
40
+ "vocab_size": 51865
41
+ }
checkpoint-3000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69c6311664a33bc54b6c97e08f1e9c9235fda8590af92fe3a3ab687f57cf6778
3
+ size 1934161093
checkpoint-3000/preprocessor_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-3000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5764b46edba29c43965ebbb92f48c0f108fbbbc77cfaa224dcfb807e88c5e55a
3
+ size 967102601
checkpoint-3000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40cb3deb4838cfec86261ac1bfe3ea3a3f050eb56b5930a641925eef640c40e4
3
+ size 14575
checkpoint-3000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67052f7ccf0c314ec66f8a5e6561069877b7b24a3a43dcf29f3baded513f272d
3
+ size 557
checkpoint-3000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ef06d0cdaf008a8194b49ac5e048244aa436a656f010611ec32a4be618c2ef4
3
+ size 627
checkpoint-3000/trainer_state.json ADDED
@@ -0,0 +1,934 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 230.27391041162227,
3
+ "best_model_checkpoint": "./checkpoint-100",
4
+ "epoch": 428.57142857142856,
5
+ "global_step": 3000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 3.57,
12
+ "learning_rate": 1.32e-07,
13
+ "loss": 2.8512,
14
+ "step": 25
15
+ },
16
+ {
17
+ "epoch": 7.14,
18
+ "learning_rate": 2.8199999999999996e-07,
19
+ "loss": 2.7082,
20
+ "step": 50
21
+ },
22
+ {
23
+ "epoch": 10.71,
24
+ "learning_rate": 2.9305263157894735e-07,
25
+ "loss": 2.3515,
26
+ "step": 75
27
+ },
28
+ {
29
+ "epoch": 14.29,
30
+ "learning_rate": 2.851578947368421e-07,
31
+ "loss": 2.0871,
32
+ "step": 100
33
+ },
34
+ {
35
+ "epoch": 14.29,
36
+ "eval_loss": 2.0101583003997803,
37
+ "eval_runtime": 720.7636,
38
+ "eval_samples_per_second": 0.71,
39
+ "eval_steps_per_second": 0.044,
40
+ "eval_wer": 230.27391041162227,
41
+ "step": 100
42
+ },
43
+ {
44
+ "epoch": 17.86,
45
+ "learning_rate": 2.7726315789473684e-07,
46
+ "loss": 1.8622,
47
+ "step": 125
48
+ },
49
+ {
50
+ "epoch": 21.43,
51
+ "learning_rate": 2.693684210526316e-07,
52
+ "loss": 1.7104,
53
+ "step": 150
54
+ },
55
+ {
56
+ "epoch": 25.0,
57
+ "learning_rate": 2.614736842105263e-07,
58
+ "loss": 1.5736,
59
+ "step": 175
60
+ },
61
+ {
62
+ "epoch": 28.57,
63
+ "learning_rate": 2.53578947368421e-07,
64
+ "loss": 1.465,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 28.57,
69
+ "eval_loss": 1.4968725442886353,
70
+ "eval_runtime": 628.5425,
71
+ "eval_samples_per_second": 0.815,
72
+ "eval_steps_per_second": 0.051,
73
+ "eval_wer": 137.24273607748182,
74
+ "step": 200
75
+ },
76
+ {
77
+ "epoch": 32.14,
78
+ "learning_rate": 2.4568421052631577e-07,
79
+ "loss": 1.3669,
80
+ "step": 225
81
+ },
82
+ {
83
+ "epoch": 35.71,
84
+ "learning_rate": 2.3778947368421054e-07,
85
+ "loss": 1.2898,
86
+ "step": 250
87
+ },
88
+ {
89
+ "epoch": 39.29,
90
+ "learning_rate": 2.2989473684210523e-07,
91
+ "loss": 1.2205,
92
+ "step": 275
93
+ },
94
+ {
95
+ "epoch": 42.86,
96
+ "learning_rate": 2.2199999999999998e-07,
97
+ "loss": 1.1617,
98
+ "step": 300
99
+ },
100
+ {
101
+ "epoch": 42.86,
102
+ "eval_loss": 1.2715740203857422,
103
+ "eval_runtime": 499.3378,
104
+ "eval_samples_per_second": 1.025,
105
+ "eval_steps_per_second": 0.064,
106
+ "eval_wer": 76.32415254237289,
107
+ "step": 300
108
+ },
109
+ {
110
+ "epoch": 46.43,
111
+ "learning_rate": 2.1410526315789472e-07,
112
+ "loss": 1.1091,
113
+ "step": 325
114
+ },
115
+ {
116
+ "epoch": 50.0,
117
+ "learning_rate": 2.0621052631578947e-07,
118
+ "loss": 1.0738,
119
+ "step": 350
120
+ },
121
+ {
122
+ "epoch": 53.57,
123
+ "learning_rate": 1.9831578947368419e-07,
124
+ "loss": 1.033,
125
+ "step": 375
126
+ },
127
+ {
128
+ "epoch": 57.14,
129
+ "learning_rate": 1.9042105263157893e-07,
130
+ "loss": 1.0019,
131
+ "step": 400
132
+ },
133
+ {
134
+ "epoch": 57.14,
135
+ "eval_loss": 1.16450834274292,
136
+ "eval_runtime": 480.3701,
137
+ "eval_samples_per_second": 1.066,
138
+ "eval_steps_per_second": 0.067,
139
+ "eval_wer": 71.37560532687651,
140
+ "step": 400
141
+ },
142
+ {
143
+ "epoch": 60.71,
144
+ "learning_rate": 1.8252631578947368e-07,
145
+ "loss": 0.9712,
146
+ "step": 425
147
+ },
148
+ {
149
+ "epoch": 64.29,
150
+ "learning_rate": 1.7463157894736842e-07,
151
+ "loss": 0.9437,
152
+ "step": 450
153
+ },
154
+ {
155
+ "epoch": 67.86,
156
+ "learning_rate": 1.6673684210526314e-07,
157
+ "loss": 0.9215,
158
+ "step": 475
159
+ },
160
+ {
161
+ "epoch": 71.43,
162
+ "learning_rate": 1.588421052631579e-07,
163
+ "loss": 0.9052,
164
+ "step": 500
165
+ },
166
+ {
167
+ "epoch": 71.43,
168
+ "eval_loss": 1.1051170825958252,
169
+ "eval_runtime": 486.3374,
170
+ "eval_samples_per_second": 1.053,
171
+ "eval_steps_per_second": 0.066,
172
+ "eval_wer": 69.78662227602905,
173
+ "step": 500
174
+ },
175
+ {
176
+ "epoch": 75.0,
177
+ "learning_rate": 1.5094736842105263e-07,
178
+ "loss": 0.8773,
179
+ "step": 525
180
+ },
181
+ {
182
+ "epoch": 78.57,
183
+ "learning_rate": 1.4305263157894735e-07,
184
+ "loss": 0.8643,
185
+ "step": 550
186
+ },
187
+ {
188
+ "epoch": 82.14,
189
+ "learning_rate": 1.351578947368421e-07,
190
+ "loss": 0.8449,
191
+ "step": 575
192
+ },
193
+ {
194
+ "epoch": 85.71,
195
+ "learning_rate": 1.2726315789473684e-07,
196
+ "loss": 0.8334,
197
+ "step": 600
198
+ },
199
+ {
200
+ "epoch": 85.71,
201
+ "eval_loss": 1.0691100358963013,
202
+ "eval_runtime": 478.3463,
203
+ "eval_samples_per_second": 1.07,
204
+ "eval_steps_per_second": 0.067,
205
+ "eval_wer": 68.26573849878935,
206
+ "step": 600
207
+ },
208
+ {
209
+ "epoch": 89.29,
210
+ "learning_rate": 1.1936842105263156e-07,
211
+ "loss": 0.8132,
212
+ "step": 625
213
+ },
214
+ {
215
+ "epoch": 92.86,
216
+ "learning_rate": 1.1147368421052631e-07,
217
+ "loss": 0.8058,
218
+ "step": 650
219
+ },
220
+ {
221
+ "epoch": 96.43,
222
+ "learning_rate": 1.0357894736842104e-07,
223
+ "loss": 0.7913,
224
+ "step": 675
225
+ },
226
+ {
227
+ "epoch": 100.0,
228
+ "learning_rate": 9.568421052631579e-08,
229
+ "loss": 0.7838,
230
+ "step": 700
231
+ },
232
+ {
233
+ "epoch": 100.0,
234
+ "eval_loss": 1.0482958555221558,
235
+ "eval_runtime": 478.3861,
236
+ "eval_samples_per_second": 1.07,
237
+ "eval_steps_per_second": 0.067,
238
+ "eval_wer": 67.16858353510897,
239
+ "step": 700
240
+ },
241
+ {
242
+ "epoch": 103.57,
243
+ "learning_rate": 8.778947368421052e-08,
244
+ "loss": 0.7768,
245
+ "step": 725
246
+ },
247
+ {
248
+ "epoch": 107.14,
249
+ "learning_rate": 7.989473684210526e-08,
250
+ "loss": 0.7673,
251
+ "step": 750
252
+ },
253
+ {
254
+ "epoch": 110.71,
255
+ "learning_rate": 7.2e-08,
256
+ "loss": 0.7643,
257
+ "step": 775
258
+ },
259
+ {
260
+ "epoch": 114.29,
261
+ "learning_rate": 6.410526315789473e-08,
262
+ "loss": 0.7539,
263
+ "step": 800
264
+ },
265
+ {
266
+ "epoch": 114.29,
267
+ "eval_loss": 1.0362622737884521,
268
+ "eval_runtime": 484.8883,
269
+ "eval_samples_per_second": 1.056,
270
+ "eval_steps_per_second": 0.066,
271
+ "eval_wer": 66.41949152542372,
272
+ "step": 800
273
+ },
274
+ {
275
+ "epoch": 117.86,
276
+ "learning_rate": 5.621052631578947e-08,
277
+ "loss": 0.7527,
278
+ "step": 825
279
+ },
280
+ {
281
+ "epoch": 121.43,
282
+ "learning_rate": 4.8315789473684206e-08,
283
+ "loss": 0.7441,
284
+ "step": 850
285
+ },
286
+ {
287
+ "epoch": 125.0,
288
+ "learning_rate": 4.0421052631578945e-08,
289
+ "loss": 0.7417,
290
+ "step": 875
291
+ },
292
+ {
293
+ "epoch": 128.57,
294
+ "learning_rate": 3.2526315789473684e-08,
295
+ "loss": 0.7377,
296
+ "step": 900
297
+ },
298
+ {
299
+ "epoch": 128.57,
300
+ "eval_loss": 1.0297424793243408,
301
+ "eval_runtime": 471.9816,
302
+ "eval_samples_per_second": 1.085,
303
+ "eval_steps_per_second": 0.068,
304
+ "eval_wer": 66.20006053268766,
305
+ "step": 900
306
+ },
307
+ {
308
+ "epoch": 132.14,
309
+ "learning_rate": 2.463157894736842e-08,
310
+ "loss": 0.7387,
311
+ "step": 925
312
+ },
313
+ {
314
+ "epoch": 135.71,
315
+ "learning_rate": 1.673684210526316e-08,
316
+ "loss": 0.7329,
317
+ "step": 950
318
+ },
319
+ {
320
+ "epoch": 139.29,
321
+ "learning_rate": 8.842105263157895e-09,
322
+ "loss": 0.7312,
323
+ "step": 975
324
+ },
325
+ {
326
+ "epoch": 142.86,
327
+ "learning_rate": 9.473684210526316e-10,
328
+ "loss": 0.7325,
329
+ "step": 1000
330
+ },
331
+ {
332
+ "epoch": 142.86,
333
+ "eval_loss": 1.0276601314544678,
334
+ "eval_runtime": 477.2948,
335
+ "eval_samples_per_second": 1.073,
336
+ "eval_steps_per_second": 0.067,
337
+ "eval_wer": 66.00332929782083,
338
+ "step": 1000
339
+ },
340
+ {
341
+ "epoch": 146.43,
342
+ "learning_rate": 1.5046153846153844e-07,
343
+ "loss": 0.7238,
344
+ "step": 1025
345
+ },
346
+ {
347
+ "epoch": 150.0,
348
+ "learning_rate": 1.466153846153846e-07,
349
+ "loss": 0.7163,
350
+ "step": 1050
351
+ },
352
+ {
353
+ "epoch": 153.57,
354
+ "learning_rate": 1.4276923076923076e-07,
355
+ "loss": 0.7028,
356
+ "step": 1075
357
+ },
358
+ {
359
+ "epoch": 157.14,
360
+ "learning_rate": 1.389230769230769e-07,
361
+ "loss": 0.6952,
362
+ "step": 1100
363
+ },
364
+ {
365
+ "epoch": 157.14,
366
+ "eval_loss": 1.0121634006500244,
367
+ "eval_runtime": 471.3724,
368
+ "eval_samples_per_second": 1.086,
369
+ "eval_steps_per_second": 0.068,
370
+ "eval_wer": 65.05750605326877,
371
+ "step": 1100
372
+ },
373
+ {
374
+ "epoch": 160.71,
375
+ "learning_rate": 1.3507692307692308e-07,
376
+ "loss": 0.6843,
377
+ "step": 1125
378
+ },
379
+ {
380
+ "epoch": 164.29,
381
+ "learning_rate": 1.3123076923076923e-07,
382
+ "loss": 0.6699,
383
+ "step": 1150
384
+ },
385
+ {
386
+ "epoch": 167.86,
387
+ "learning_rate": 1.2738461538461538e-07,
388
+ "loss": 0.6671,
389
+ "step": 1175
390
+ },
391
+ {
392
+ "epoch": 171.43,
393
+ "learning_rate": 1.2353846153846153e-07,
394
+ "loss": 0.6531,
395
+ "step": 1200
396
+ },
397
+ {
398
+ "epoch": 171.43,
399
+ "eval_loss": 1.0014406442642212,
400
+ "eval_runtime": 475.4519,
401
+ "eval_samples_per_second": 1.077,
402
+ "eval_steps_per_second": 0.067,
403
+ "eval_wer": 64.42191283292978,
404
+ "step": 1200
405
+ },
406
+ {
407
+ "epoch": 175.0,
408
+ "learning_rate": 1.1969230769230767e-07,
409
+ "loss": 0.6487,
410
+ "step": 1225
411
+ },
412
+ {
413
+ "epoch": 178.57,
414
+ "learning_rate": 1.1584615384615385e-07,
415
+ "loss": 0.6369,
416
+ "step": 1250
417
+ },
418
+ {
419
+ "epoch": 182.14,
420
+ "learning_rate": 1.12e-07,
421
+ "loss": 0.6336,
422
+ "step": 1275
423
+ },
424
+ {
425
+ "epoch": 185.71,
426
+ "learning_rate": 1.0815384615384614e-07,
427
+ "loss": 0.6189,
428
+ "step": 1300
429
+ },
430
+ {
431
+ "epoch": 185.71,
432
+ "eval_loss": 0.9944669008255005,
433
+ "eval_runtime": 470.4039,
434
+ "eval_samples_per_second": 1.088,
435
+ "eval_steps_per_second": 0.068,
436
+ "eval_wer": 63.79388619854721,
437
+ "step": 1300
438
+ },
439
+ {
440
+ "epoch": 189.29,
441
+ "learning_rate": 1.043076923076923e-07,
442
+ "loss": 0.6213,
443
+ "step": 1325
444
+ },
445
+ {
446
+ "epoch": 192.86,
447
+ "learning_rate": 1.0046153846153845e-07,
448
+ "loss": 0.608,
449
+ "step": 1350
450
+ },
451
+ {
452
+ "epoch": 196.43,
453
+ "learning_rate": 9.66153846153846e-08,
454
+ "loss": 0.6029,
455
+ "step": 1375
456
+ },
457
+ {
458
+ "epoch": 200.0,
459
+ "learning_rate": 9.276923076923078e-08,
460
+ "loss": 0.5993,
461
+ "step": 1400
462
+ },
463
+ {
464
+ "epoch": 200.0,
465
+ "eval_loss": 0.9895604252815247,
466
+ "eval_runtime": 473.2431,
467
+ "eval_samples_per_second": 1.082,
468
+ "eval_steps_per_second": 0.068,
469
+ "eval_wer": 63.35502421307506,
470
+ "step": 1400
471
+ },
472
+ {
473
+ "epoch": 203.57,
474
+ "learning_rate": 8.892307692307692e-08,
475
+ "loss": 0.593,
476
+ "step": 1425
477
+ },
478
+ {
479
+ "epoch": 207.14,
480
+ "learning_rate": 8.507692307692307e-08,
481
+ "loss": 0.5817,
482
+ "step": 1450
483
+ },
484
+ {
485
+ "epoch": 210.71,
486
+ "learning_rate": 8.123076923076922e-08,
487
+ "loss": 0.5782,
488
+ "step": 1475
489
+ },
490
+ {
491
+ "epoch": 214.29,
492
+ "learning_rate": 7.738461538461538e-08,
493
+ "loss": 0.5757,
494
+ "step": 1500
495
+ },
496
+ {
497
+ "epoch": 214.29,
498
+ "eval_loss": 0.9864457845687866,
499
+ "eval_runtime": 474.414,
500
+ "eval_samples_per_second": 1.079,
501
+ "eval_steps_per_second": 0.067,
502
+ "eval_wer": 63.22639225181598,
503
+ "step": 1500
504
+ },
505
+ {
506
+ "epoch": 217.86,
507
+ "learning_rate": 7.353846153846153e-08,
508
+ "loss": 0.5706,
509
+ "step": 1525
510
+ },
511
+ {
512
+ "epoch": 221.43,
513
+ "learning_rate": 6.969230769230769e-08,
514
+ "loss": 0.5624,
515
+ "step": 1550
516
+ },
517
+ {
518
+ "epoch": 225.0,
519
+ "learning_rate": 6.584615384615385e-08,
520
+ "loss": 0.5638,
521
+ "step": 1575
522
+ },
523
+ {
524
+ "epoch": 228.57,
525
+ "learning_rate": 6.2e-08,
526
+ "loss": 0.5601,
527
+ "step": 1600
528
+ },
529
+ {
530
+ "epoch": 228.57,
531
+ "eval_loss": 0.9844600558280945,
532
+ "eval_runtime": 478.7212,
533
+ "eval_samples_per_second": 1.07,
534
+ "eval_steps_per_second": 0.067,
535
+ "eval_wer": 62.916162227602904,
536
+ "step": 1600
537
+ },
538
+ {
539
+ "epoch": 232.14,
540
+ "learning_rate": 5.815384615384615e-08,
541
+ "loss": 0.5537,
542
+ "step": 1625
543
+ },
544
+ {
545
+ "epoch": 235.71,
546
+ "learning_rate": 5.430769230769231e-08,
547
+ "loss": 0.5488,
548
+ "step": 1650
549
+ },
550
+ {
551
+ "epoch": 239.29,
552
+ "learning_rate": 5.0461538461538456e-08,
553
+ "loss": 0.5479,
554
+ "step": 1675
555
+ },
556
+ {
557
+ "epoch": 242.86,
558
+ "learning_rate": 4.661538461538461e-08,
559
+ "loss": 0.5482,
560
+ "step": 1700
561
+ },
562
+ {
563
+ "epoch": 242.86,
564
+ "eval_loss": 0.9833234548568726,
565
+ "eval_runtime": 488.3079,
566
+ "eval_samples_per_second": 1.049,
567
+ "eval_steps_per_second": 0.066,
568
+ "eval_wer": 62.817796610169495,
569
+ "step": 1700
570
+ },
571
+ {
572
+ "epoch": 246.43,
573
+ "learning_rate": 4.2769230769230765e-08,
574
+ "loss": 0.5441,
575
+ "step": 1725
576
+ },
577
+ {
578
+ "epoch": 250.0,
579
+ "learning_rate": 3.892307692307692e-08,
580
+ "loss": 0.54,
581
+ "step": 1750
582
+ },
583
+ {
584
+ "epoch": 253.57,
585
+ "learning_rate": 3.5076923076923074e-08,
586
+ "loss": 0.538,
587
+ "step": 1775
588
+ },
589
+ {
590
+ "epoch": 257.14,
591
+ "learning_rate": 3.123076923076923e-08,
592
+ "loss": 0.5382,
593
+ "step": 1800
594
+ },
595
+ {
596
+ "epoch": 257.14,
597
+ "eval_loss": 0.9826769828796387,
598
+ "eval_runtime": 486.3686,
599
+ "eval_samples_per_second": 1.053,
600
+ "eval_steps_per_second": 0.066,
601
+ "eval_wer": 62.84049636803874,
602
+ "step": 1800
603
+ },
604
+ {
605
+ "epoch": 260.71,
606
+ "learning_rate": 2.7384615384615387e-08,
607
+ "loss": 0.5343,
608
+ "step": 1825
609
+ },
610
+ {
611
+ "epoch": 264.29,
612
+ "learning_rate": 2.3538461538461535e-08,
613
+ "loss": 0.5313,
614
+ "step": 1850
615
+ },
616
+ {
617
+ "epoch": 267.86,
618
+ "learning_rate": 1.9692307692307693e-08,
619
+ "loss": 0.5318,
620
+ "step": 1875
621
+ },
622
+ {
623
+ "epoch": 271.43,
624
+ "learning_rate": 1.5846153846153844e-08,
625
+ "loss": 0.5325,
626
+ "step": 1900
627
+ },
628
+ {
629
+ "epoch": 271.43,
630
+ "eval_loss": 0.9823360443115234,
631
+ "eval_runtime": 483.6609,
632
+ "eval_samples_per_second": 1.059,
633
+ "eval_steps_per_second": 0.066,
634
+ "eval_wer": 62.76483050847458,
635
+ "step": 1900
636
+ },
637
+ {
638
+ "epoch": 275.0,
639
+ "learning_rate": 1.2e-08,
640
+ "loss": 0.529,
641
+ "step": 1925
642
+ },
643
+ {
644
+ "epoch": 278.57,
645
+ "learning_rate": 8.153846153846154e-09,
646
+ "loss": 0.5294,
647
+ "step": 1950
648
+ },
649
+ {
650
+ "epoch": 282.14,
651
+ "learning_rate": 4.307692307692307e-09,
652
+ "loss": 0.525,
653
+ "step": 1975
654
+ },
655
+ {
656
+ "epoch": 285.71,
657
+ "learning_rate": 4.615384615384615e-10,
658
+ "loss": 0.5287,
659
+ "step": 2000
660
+ },
661
+ {
662
+ "epoch": 285.71,
663
+ "eval_loss": 0.9822061061859131,
664
+ "eval_runtime": 484.044,
665
+ "eval_samples_per_second": 1.058,
666
+ "eval_steps_per_second": 0.066,
667
+ "eval_wer": 62.817796610169495,
668
+ "step": 2000
669
+ },
670
+ {
671
+ "epoch": 289.29,
672
+ "learning_rate": 1.9853333333333334e-07,
673
+ "loss": 0.523,
674
+ "step": 2025
675
+ },
676
+ {
677
+ "epoch": 292.86,
678
+ "learning_rate": 1.9686666666666667e-07,
679
+ "loss": 0.5133,
680
+ "step": 2050
681
+ },
682
+ {
683
+ "epoch": 296.43,
684
+ "learning_rate": 1.9519999999999997e-07,
685
+ "loss": 0.4999,
686
+ "step": 2075
687
+ },
688
+ {
689
+ "epoch": 300.0,
690
+ "learning_rate": 1.935333333333333e-07,
691
+ "loss": 0.4924,
692
+ "step": 2100
693
+ },
694
+ {
695
+ "epoch": 303.57,
696
+ "learning_rate": 1.9186666666666666e-07,
697
+ "loss": 0.4855,
698
+ "step": 2125
699
+ },
700
+ {
701
+ "epoch": 307.14,
702
+ "learning_rate": 1.902e-07,
703
+ "loss": 0.4701,
704
+ "step": 2150
705
+ },
706
+ {
707
+ "epoch": 310.71,
708
+ "learning_rate": 1.8853333333333333e-07,
709
+ "loss": 0.4601,
710
+ "step": 2175
711
+ },
712
+ {
713
+ "epoch": 314.29,
714
+ "learning_rate": 1.8686666666666669e-07,
715
+ "loss": 0.4525,
716
+ "step": 2200
717
+ },
718
+ {
719
+ "epoch": 317.86,
720
+ "learning_rate": 1.852e-07,
721
+ "loss": 0.4448,
722
+ "step": 2225
723
+ },
724
+ {
725
+ "epoch": 321.43,
726
+ "learning_rate": 1.8353333333333332e-07,
727
+ "loss": 0.4364,
728
+ "step": 2250
729
+ },
730
+ {
731
+ "epoch": 325.0,
732
+ "learning_rate": 1.8186666666666665e-07,
733
+ "loss": 0.4232,
734
+ "step": 2275
735
+ },
736
+ {
737
+ "epoch": 328.57,
738
+ "learning_rate": 1.8019999999999999e-07,
739
+ "loss": 0.4163,
740
+ "step": 2300
741
+ },
742
+ {
743
+ "epoch": 332.14,
744
+ "learning_rate": 1.7853333333333334e-07,
745
+ "loss": 0.4089,
746
+ "step": 2325
747
+ },
748
+ {
749
+ "epoch": 335.71,
750
+ "learning_rate": 1.7686666666666668e-07,
751
+ "loss": 0.4031,
752
+ "step": 2350
753
+ },
754
+ {
755
+ "epoch": 339.29,
756
+ "learning_rate": 1.7519999999999998e-07,
757
+ "loss": 0.3887,
758
+ "step": 2375
759
+ },
760
+ {
761
+ "epoch": 342.86,
762
+ "learning_rate": 1.735333333333333e-07,
763
+ "loss": 0.3826,
764
+ "step": 2400
765
+ },
766
+ {
767
+ "epoch": 346.43,
768
+ "learning_rate": 1.7186666666666667e-07,
769
+ "loss": 0.3766,
770
+ "step": 2425
771
+ },
772
+ {
773
+ "epoch": 350.0,
774
+ "learning_rate": 1.702e-07,
775
+ "loss": 0.3647,
776
+ "step": 2450
777
+ },
778
+ {
779
+ "epoch": 353.57,
780
+ "learning_rate": 1.6853333333333333e-07,
781
+ "loss": 0.3601,
782
+ "step": 2475
783
+ },
784
+ {
785
+ "epoch": 357.14,
786
+ "learning_rate": 1.6686666666666664e-07,
787
+ "loss": 0.3494,
788
+ "step": 2500
789
+ },
790
+ {
791
+ "epoch": 357.14,
792
+ "eval_loss": 1.0025562047958374,
793
+ "eval_runtime": 489.174,
794
+ "eval_samples_per_second": 1.047,
795
+ "eval_steps_per_second": 0.065,
796
+ "eval_wer": 61.61470944309927,
797
+ "step": 2500
798
+ },
799
+ {
800
+ "epoch": 360.71,
801
+ "learning_rate": 1.652e-07,
802
+ "loss": 0.3448,
803
+ "step": 2525
804
+ },
805
+ {
806
+ "epoch": 364.29,
807
+ "learning_rate": 1.6353333333333333e-07,
808
+ "loss": 0.3367,
809
+ "step": 2550
810
+ },
811
+ {
812
+ "epoch": 367.86,
813
+ "learning_rate": 1.6186666666666666e-07,
814
+ "loss": 0.3308,
815
+ "step": 2575
816
+ },
817
+ {
818
+ "epoch": 371.43,
819
+ "learning_rate": 1.602e-07,
820
+ "loss": 0.3226,
821
+ "step": 2600
822
+ },
823
+ {
824
+ "epoch": 375.0,
825
+ "learning_rate": 1.5853333333333335e-07,
826
+ "loss": 0.3165,
827
+ "step": 2625
828
+ },
829
+ {
830
+ "epoch": 378.57,
831
+ "learning_rate": 1.5686666666666666e-07,
832
+ "loss": 0.3099,
833
+ "step": 2650
834
+ },
835
+ {
836
+ "epoch": 382.14,
837
+ "learning_rate": 1.552e-07,
838
+ "loss": 0.3021,
839
+ "step": 2675
840
+ },
841
+ {
842
+ "epoch": 385.71,
843
+ "learning_rate": 1.5353333333333332e-07,
844
+ "loss": 0.2964,
845
+ "step": 2700
846
+ },
847
+ {
848
+ "epoch": 389.29,
849
+ "learning_rate": 1.5186666666666668e-07,
850
+ "loss": 0.2901,
851
+ "step": 2725
852
+ },
853
+ {
854
+ "epoch": 392.86,
855
+ "learning_rate": 1.502e-07,
856
+ "loss": 0.284,
857
+ "step": 2750
858
+ },
859
+ {
860
+ "epoch": 396.43,
861
+ "learning_rate": 1.4853333333333334e-07,
862
+ "loss": 0.279,
863
+ "step": 2775
864
+ },
865
+ {
866
+ "epoch": 400.0,
867
+ "learning_rate": 1.4686666666666667e-07,
868
+ "loss": 0.2715,
869
+ "step": 2800
870
+ },
871
+ {
872
+ "epoch": 403.57,
873
+ "learning_rate": 1.4519999999999998e-07,
874
+ "loss": 0.2646,
875
+ "step": 2825
876
+ },
877
+ {
878
+ "epoch": 407.14,
879
+ "learning_rate": 1.4353333333333333e-07,
880
+ "loss": 0.2606,
881
+ "step": 2850
882
+ },
883
+ {
884
+ "epoch": 410.71,
885
+ "learning_rate": 1.4186666666666667e-07,
886
+ "loss": 0.2564,
887
+ "step": 2875
888
+ },
889
+ {
890
+ "epoch": 414.29,
891
+ "learning_rate": 1.402e-07,
892
+ "loss": 0.2486,
893
+ "step": 2900
894
+ },
895
+ {
896
+ "epoch": 417.86,
897
+ "learning_rate": 1.3853333333333333e-07,
898
+ "loss": 0.2463,
899
+ "step": 2925
900
+ },
901
+ {
902
+ "epoch": 421.43,
903
+ "learning_rate": 1.3686666666666666e-07,
904
+ "loss": 0.239,
905
+ "step": 2950
906
+ },
907
+ {
908
+ "epoch": 425.0,
909
+ "learning_rate": 1.352e-07,
910
+ "loss": 0.2341,
911
+ "step": 2975
912
+ },
913
+ {
914
+ "epoch": 428.57,
915
+ "learning_rate": 1.3353333333333332e-07,
916
+ "loss": 0.2287,
917
+ "step": 3000
918
+ },
919
+ {
920
+ "epoch": 428.57,
921
+ "eval_loss": 1.0533033609390259,
922
+ "eval_runtime": 465.8233,
923
+ "eval_samples_per_second": 1.099,
924
+ "eval_steps_per_second": 0.069,
925
+ "eval_wer": 61.516343825665864,
926
+ "step": 3000
927
+ }
928
+ ],
929
+ "max_steps": 5000,
930
+ "num_train_epochs": 715,
931
+ "total_flos": 5.330864948871168e+19,
932
+ "trial_name": null,
933
+ "trial_params": null
934
+ }
checkpoint-3000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d15d72002cecf8323f6cd941cec892fce6b19d6e69aa1365a60139bd64d32ff
3
+ size 3579