infinitejoy commited on
Commit
b44f7f3
1 Parent(s): edb8379

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 100.0,
3
- "eval_loss": 0.6831369996070862,
4
- "eval_runtime": 55.7549,
5
  "eval_samples": 1703,
6
- "eval_samples_per_second": 30.544,
7
- "eval_steps_per_second": 0.969,
8
- "eval_wer": 0.4286635944700461,
9
- "train_loss": 0.9041624231254105,
10
- "train_runtime": 18573.3759,
11
  "train_samples": 3601,
12
- "train_samples_per_second": 19.388,
13
- "train_steps_per_second": 0.608
14
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "eval_loss": 0.659227728843689,
4
+ "eval_runtime": 51.9908,
5
  "eval_samples": 1703,
6
+ "eval_samples_per_second": 32.756,
7
+ "eval_steps_per_second": 1.039,
8
+ "eval_wer": 0.45640552995391703,
9
+ "train_loss": 0.9130245208740234,
10
+ "train_runtime": 17767.5681,
11
  "train_samples": 3601,
12
+ "train_samples_per_second": 20.267,
13
+ "train_steps_per_second": 0.636
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 100.0,
3
- "eval_loss": 0.6831369996070862,
4
- "eval_runtime": 55.7549,
5
  "eval_samples": 1703,
6
- "eval_samples_per_second": 30.544,
7
- "eval_steps_per_second": 0.969,
8
- "eval_wer": 0.4286635944700461
9
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "eval_loss": 0.659227728843689,
4
+ "eval_runtime": 51.9908,
5
  "eval_samples": 1703,
6
+ "eval_samples_per_second": 32.756,
7
+ "eval_steps_per_second": 1.039,
8
+ "eval_wer": 0.45640552995391703
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5301fb5708bea0c9a55fac6bf9ab02b82cb49965ed803e96ab4ddae579b8b706
3
  size 1262136881
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04dc2d5e12b690a836e89132ee1906bc07361f91b36f64bf93b25dc84b5b8a23
3
  size 1262136881
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 100.0,
3
- "train_loss": 0.9041624231254105,
4
- "train_runtime": 18573.3759,
5
  "train_samples": 3601,
6
- "train_samples_per_second": 19.388,
7
- "train_steps_per_second": 0.608
8
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "train_loss": 0.9130245208740234,
4
+ "train_runtime": 17767.5681,
5
  "train_samples": 3601,
6
+ "train_samples_per_second": 20.267,
7
+ "train_steps_per_second": 0.636
8
  }
trainer_state.json CHANGED
@@ -10,892 +10,892 @@
10
  {
11
  "epoch": 0.88,
12
  "learning_rate": 5.82e-05,
13
- "loss": 10.446,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 1.77,
18
  "learning_rate": 0.0001182,
19
- "loss": 3.652,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 2.65,
24
  "learning_rate": 0.00017819999999999997,
25
- "loss": 3.2102,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 3.54,
30
  "learning_rate": 0.0002382,
31
- "loss": 3.1322,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 4.42,
36
  "learning_rate": 0.0002982,
37
- "loss": 3.0798,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 4.42,
42
- "eval_loss": 3.001023054122925,
43
- "eval_runtime": 56.1867,
44
- "eval_samples_per_second": 30.31,
45
- "eval_steps_per_second": 0.961,
46
- "eval_wer": 1.0011981566820276,
47
  "step": 500
48
  },
49
  {
50
  "epoch": 5.31,
51
  "learning_rate": 0.00029730555555555554,
52
- "loss": 2.5544,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 6.19,
57
  "learning_rate": 0.0002945277777777777,
58
- "loss": 1.8568,
59
  "step": 700
60
  },
61
  {
62
  "epoch": 7.08,
63
  "learning_rate": 0.00029174999999999996,
64
- "loss": 1.6575,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 7.96,
69
  "learning_rate": 0.0002889722222222222,
70
- "loss": 1.5125,
71
  "step": 900
72
  },
73
  {
74
  "epoch": 8.85,
75
  "learning_rate": 0.0002861944444444444,
76
- "loss": 1.4336,
77
  "step": 1000
78
  },
79
  {
80
  "epoch": 8.85,
81
- "eval_loss": 0.8481376767158508,
82
- "eval_runtime": 54.9915,
83
- "eval_samples_per_second": 30.968,
84
- "eval_steps_per_second": 0.982,
85
- "eval_wer": 0.6910599078341014,
86
  "step": 1000
87
  },
88
  {
89
  "epoch": 9.73,
90
  "learning_rate": 0.0002834166666666666,
91
- "loss": 1.3709,
92
  "step": 1100
93
  },
94
  {
95
  "epoch": 10.62,
96
  "learning_rate": 0.00028063888888888886,
97
- "loss": 1.3058,
98
  "step": 1200
99
  },
100
  {
101
  "epoch": 11.5,
102
  "learning_rate": 0.0002778611111111111,
103
- "loss": 1.2604,
104
  "step": 1300
105
  },
106
  {
107
  "epoch": 12.39,
108
  "learning_rate": 0.0002750833333333333,
109
- "loss": 1.2065,
110
  "step": 1400
111
  },
112
  {
113
  "epoch": 13.27,
114
  "learning_rate": 0.00027230555555555553,
115
- "loss": 1.2062,
116
  "step": 1500
117
  },
118
  {
119
  "epoch": 13.27,
120
- "eval_loss": 0.7312180995941162,
121
- "eval_runtime": 55.5977,
122
- "eval_samples_per_second": 30.631,
123
- "eval_steps_per_second": 0.971,
124
- "eval_wer": 0.6332718894009216,
125
  "step": 1500
126
  },
127
  {
128
  "epoch": 14.16,
129
  "learning_rate": 0.00026952777777777777,
130
- "loss": 1.1712,
131
  "step": 1600
132
  },
133
  {
134
  "epoch": 15.04,
135
  "learning_rate": 0.00026674999999999995,
136
- "loss": 1.1348,
137
  "step": 1700
138
  },
139
  {
140
  "epoch": 15.93,
141
  "learning_rate": 0.0002639722222222222,
142
- "loss": 1.1077,
143
  "step": 1800
144
  },
145
  {
146
  "epoch": 16.81,
147
  "learning_rate": 0.00026119444444444443,
148
- "loss": 1.0821,
149
  "step": 1900
150
  },
151
  {
152
  "epoch": 17.7,
153
  "learning_rate": 0.0002584166666666666,
154
- "loss": 1.0481,
155
  "step": 2000
156
  },
157
  {
158
  "epoch": 17.7,
159
- "eval_loss": 0.6849815845489502,
160
- "eval_runtime": 54.2403,
161
- "eval_samples_per_second": 31.397,
162
- "eval_steps_per_second": 0.996,
163
- "eval_wer": 0.5358525345622119,
164
  "step": 2000
165
  },
166
  {
167
  "epoch": 18.58,
168
  "learning_rate": 0.0002556388888888889,
169
- "loss": 1.0495,
170
  "step": 2100
171
  },
172
  {
173
  "epoch": 19.47,
174
  "learning_rate": 0.0002528611111111111,
175
- "loss": 1.0119,
176
  "step": 2200
177
  },
178
  {
179
  "epoch": 20.35,
180
  "learning_rate": 0.0002500833333333333,
181
- "loss": 1.0156,
182
  "step": 2300
183
  },
184
  {
185
  "epoch": 21.24,
186
  "learning_rate": 0.00024730555555555557,
187
- "loss": 0.9916,
188
  "step": 2400
189
  },
190
  {
191
  "epoch": 22.12,
192
  "learning_rate": 0.00024452777777777776,
193
- "loss": 0.9837,
194
  "step": 2500
195
  },
196
  {
197
  "epoch": 22.12,
198
- "eval_loss": 0.6336787343025208,
199
- "eval_runtime": 55.0498,
200
- "eval_samples_per_second": 30.936,
201
- "eval_steps_per_second": 0.981,
202
- "eval_wer": 0.5316129032258065,
203
  "step": 2500
204
  },
205
  {
206
  "epoch": 23.01,
207
  "learning_rate": 0.00024174999999999997,
208
- "loss": 0.9558,
209
  "step": 2600
210
  },
211
  {
212
  "epoch": 23.89,
213
  "learning_rate": 0.0002389722222222222,
214
- "loss": 0.9523,
215
  "step": 2700
216
  },
217
  {
218
  "epoch": 24.78,
219
  "learning_rate": 0.00023619444444444442,
220
- "loss": 0.946,
221
  "step": 2800
222
  },
223
  {
224
  "epoch": 25.66,
225
  "learning_rate": 0.00023341666666666663,
226
- "loss": 0.909,
227
  "step": 2900
228
  },
229
  {
230
  "epoch": 26.55,
231
  "learning_rate": 0.00023063888888888887,
232
- "loss": 0.9108,
233
  "step": 3000
234
  },
235
  {
236
  "epoch": 26.55,
237
- "eval_loss": 0.6257887482643127,
238
- "eval_runtime": 55.6823,
239
- "eval_samples_per_second": 30.584,
240
- "eval_steps_per_second": 0.97,
241
- "eval_wer": 0.507926267281106,
242
  "step": 3000
243
  },
244
  {
245
  "epoch": 27.43,
246
  "learning_rate": 0.00022786111111111108,
247
- "loss": 0.9005,
248
  "step": 3100
249
  },
250
  {
251
  "epoch": 28.32,
252
  "learning_rate": 0.00022508333333333332,
253
- "loss": 0.9069,
254
  "step": 3200
255
  },
256
  {
257
  "epoch": 29.2,
258
  "learning_rate": 0.00022230555555555553,
259
- "loss": 0.8605,
260
  "step": 3300
261
  },
262
  {
263
  "epoch": 30.09,
264
  "learning_rate": 0.00021952777777777774,
265
- "loss": 0.8815,
266
  "step": 3400
267
  },
268
  {
269
  "epoch": 30.97,
270
  "learning_rate": 0.00021674999999999998,
271
- "loss": 0.8439,
272
  "step": 3500
273
  },
274
  {
275
  "epoch": 30.97,
276
- "eval_loss": 0.6301265954971313,
277
- "eval_runtime": 54.5193,
278
- "eval_samples_per_second": 31.237,
279
- "eval_steps_per_second": 0.99,
280
- "eval_wer": 0.48884792626728113,
281
  "step": 3500
282
  },
283
  {
284
  "epoch": 31.86,
285
  "learning_rate": 0.0002139722222222222,
286
- "loss": 0.8522,
287
  "step": 3600
288
  },
289
  {
290
  "epoch": 32.74,
291
  "learning_rate": 0.0002111944444444444,
292
- "loss": 0.8477,
293
  "step": 3700
294
  },
295
  {
296
  "epoch": 33.63,
297
  "learning_rate": 0.00020841666666666665,
298
- "loss": 0.7978,
299
  "step": 3800
300
  },
301
  {
302
  "epoch": 34.51,
303
  "learning_rate": 0.00020563888888888886,
304
- "loss": 0.8127,
305
  "step": 3900
306
  },
307
  {
308
  "epoch": 35.4,
309
  "learning_rate": 0.0002028611111111111,
310
- "loss": 0.7901,
311
  "step": 4000
312
  },
313
  {
314
  "epoch": 35.4,
315
- "eval_loss": 0.6244927048683167,
316
- "eval_runtime": 53.6992,
317
- "eval_samples_per_second": 31.714,
318
- "eval_steps_per_second": 1.006,
319
- "eval_wer": 0.4976958525345622,
320
  "step": 4000
321
  },
322
  {
323
  "epoch": 36.28,
324
- "learning_rate": 0.0002000833333333333,
325
- "loss": 0.7978,
326
  "step": 4100
327
  },
328
  {
329
  "epoch": 37.17,
330
- "learning_rate": 0.00019730555555555552,
331
- "loss": 0.8046,
332
  "step": 4200
333
  },
334
  {
335
  "epoch": 38.05,
336
- "learning_rate": 0.00019452777777777776,
337
- "loss": 0.7892,
338
  "step": 4300
339
  },
340
  {
341
  "epoch": 38.94,
342
- "learning_rate": 0.00019174999999999997,
343
- "loss": 0.7657,
344
  "step": 4400
345
  },
346
  {
347
  "epoch": 39.82,
348
  "learning_rate": 0.00018899999999999999,
349
- "loss": 0.7669,
350
  "step": 4500
351
  },
352
  {
353
  "epoch": 39.82,
354
- "eval_loss": 0.6164370775222778,
355
- "eval_runtime": 54.5713,
356
- "eval_samples_per_second": 31.207,
357
- "eval_steps_per_second": 0.99,
358
- "eval_wer": 0.4671889400921659,
359
  "step": 4500
360
  },
361
  {
362
  "epoch": 40.71,
363
  "learning_rate": 0.0001862222222222222,
364
- "loss": 0.7389,
365
  "step": 4600
366
  },
367
  {
368
  "epoch": 41.59,
369
  "learning_rate": 0.00018344444444444444,
370
- "loss": 0.7595,
371
  "step": 4700
372
  },
373
  {
374
  "epoch": 42.48,
375
  "learning_rate": 0.00018066666666666665,
376
- "loss": 0.7264,
377
  "step": 4800
378
  },
379
  {
380
  "epoch": 43.36,
381
  "learning_rate": 0.00017788888888888886,
382
- "loss": 0.7251,
383
  "step": 4900
384
  },
385
  {
386
  "epoch": 44.25,
387
  "learning_rate": 0.0001751111111111111,
388
- "loss": 0.7196,
389
  "step": 5000
390
  },
391
  {
392
  "epoch": 44.25,
393
- "eval_loss": 0.6039230227470398,
394
- "eval_runtime": 55.8782,
395
- "eval_samples_per_second": 30.477,
396
- "eval_steps_per_second": 0.966,
397
- "eval_wer": 0.4688479262672811,
398
  "step": 5000
399
  },
400
  {
401
  "epoch": 45.13,
402
  "learning_rate": 0.0001723333333333333,
403
- "loss": 0.6932,
404
  "step": 5100
405
  },
406
  {
407
  "epoch": 46.02,
408
  "learning_rate": 0.00016955555555555555,
409
- "loss": 0.6983,
410
  "step": 5200
411
  },
412
  {
413
  "epoch": 46.9,
414
  "learning_rate": 0.00016677777777777776,
415
- "loss": 0.6769,
416
  "step": 5300
417
  },
418
  {
419
  "epoch": 47.79,
420
  "learning_rate": 0.00016399999999999997,
421
- "loss": 0.6928,
422
  "step": 5400
423
  },
424
  {
425
  "epoch": 48.67,
426
  "learning_rate": 0.00016122222222222221,
427
- "loss": 0.6715,
428
  "step": 5500
429
  },
430
  {
431
  "epoch": 48.67,
432
- "eval_loss": 0.5900057554244995,
433
- "eval_runtime": 53.576,
434
- "eval_samples_per_second": 31.787,
435
- "eval_steps_per_second": 1.008,
436
- "eval_wer": 0.45732718894009217,
437
  "step": 5500
438
  },
439
  {
440
  "epoch": 49.56,
441
- "learning_rate": 0.00015849999999999998,
442
- "loss": 0.6833,
443
  "step": 5600
444
  },
445
  {
446
  "epoch": 50.44,
447
- "learning_rate": 0.0001557222222222222,
448
- "loss": 0.6673,
449
  "step": 5700
450
  },
451
  {
452
  "epoch": 51.33,
453
- "learning_rate": 0.00015294444444444443,
454
- "loss": 0.6791,
455
  "step": 5800
456
  },
457
  {
458
  "epoch": 52.21,
459
- "learning_rate": 0.00015016666666666664,
460
- "loss": 0.6292,
461
  "step": 5900
462
  },
463
  {
464
  "epoch": 53.1,
465
- "learning_rate": 0.00014738888888888888,
466
- "loss": 0.6441,
467
  "step": 6000
468
  },
469
  {
470
  "epoch": 53.1,
471
- "eval_loss": 0.7002069354057312,
472
- "eval_runtime": 55.3769,
473
- "eval_samples_per_second": 30.753,
474
- "eval_steps_per_second": 0.975,
475
- "eval_wer": 0.479815668202765,
476
  "step": 6000
477
  },
478
  {
479
  "epoch": 53.98,
480
- "learning_rate": 0.0001446111111111111,
481
- "loss": 0.6217,
482
  "step": 6100
483
  },
484
  {
485
  "epoch": 54.87,
486
- "learning_rate": 0.00014183333333333333,
487
- "loss": 0.6131,
488
  "step": 6200
489
  },
490
  {
491
  "epoch": 55.75,
492
- "learning_rate": 0.00013905555555555554,
493
- "loss": 0.6211,
494
  "step": 6300
495
  },
496
  {
497
  "epoch": 56.64,
498
- "learning_rate": 0.00013627777777777775,
499
- "loss": 0.6104,
500
  "step": 6400
501
  },
502
  {
503
  "epoch": 57.52,
504
- "learning_rate": 0.0001335,
505
- "loss": 0.5938,
506
  "step": 6500
507
  },
508
  {
509
  "epoch": 57.52,
510
- "eval_loss": 0.6249451637268066,
511
- "eval_runtime": 55.668,
512
- "eval_samples_per_second": 30.592,
513
- "eval_steps_per_second": 0.97,
514
- "eval_wer": 0.4578801843317972,
515
  "step": 6500
516
  },
517
  {
518
  "epoch": 58.41,
519
  "learning_rate": 0.0001307222222222222,
520
- "loss": 0.6015,
521
  "step": 6600
522
  },
523
  {
524
  "epoch": 59.29,
525
  "learning_rate": 0.00012794444444444442,
526
- "loss": 0.591,
527
  "step": 6700
528
  },
529
  {
530
  "epoch": 60.18,
531
  "learning_rate": 0.00012516666666666666,
532
- "loss": 0.5734,
533
  "step": 6800
534
  },
535
  {
536
  "epoch": 61.06,
537
  "learning_rate": 0.0001223888888888889,
538
- "loss": 0.5685,
539
  "step": 6900
540
  },
541
  {
542
  "epoch": 61.95,
543
  "learning_rate": 0.0001196111111111111,
544
- "loss": 0.5541,
545
  "step": 7000
546
  },
547
  {
548
  "epoch": 61.95,
549
- "eval_loss": 0.6184473633766174,
550
- "eval_runtime": 55.2876,
551
- "eval_samples_per_second": 30.803,
552
- "eval_steps_per_second": 0.977,
553
- "eval_wer": 0.4424884792626728,
554
  "step": 7000
555
  },
556
  {
557
  "epoch": 62.83,
558
  "learning_rate": 0.00011683333333333332,
559
- "loss": 0.5546,
560
  "step": 7100
561
  },
562
  {
563
  "epoch": 63.72,
564
  "learning_rate": 0.00011405555555555554,
565
- "loss": 0.5473,
566
  "step": 7200
567
  },
568
  {
569
  "epoch": 64.6,
570
  "learning_rate": 0.00011127777777777777,
571
- "loss": 0.5592,
572
  "step": 7300
573
  },
574
  {
575
  "epoch": 65.49,
576
  "learning_rate": 0.0001085,
577
- "loss": 0.5349,
578
  "step": 7400
579
  },
580
  {
581
  "epoch": 66.37,
582
  "learning_rate": 0.0001057222222222222,
583
- "loss": 0.5506,
584
  "step": 7500
585
  },
586
  {
587
  "epoch": 66.37,
588
- "eval_loss": 0.6962713003158569,
589
- "eval_runtime": 55.4374,
590
- "eval_samples_per_second": 30.719,
591
- "eval_steps_per_second": 0.974,
592
- "eval_wer": 0.45852534562211983,
593
  "step": 7500
594
  },
595
  {
596
  "epoch": 67.26,
597
  "learning_rate": 0.00010294444444444443,
598
- "loss": 0.5313,
599
  "step": 7600
600
  },
601
  {
602
  "epoch": 68.14,
603
  "learning_rate": 0.00010016666666666666,
604
- "loss": 0.5267,
605
  "step": 7700
606
  },
607
  {
608
  "epoch": 69.03,
609
  "learning_rate": 9.738888888888888e-05,
610
- "loss": 0.5222,
611
  "step": 7800
612
  },
613
  {
614
  "epoch": 69.91,
615
  "learning_rate": 9.46111111111111e-05,
616
- "loss": 0.5101,
617
  "step": 7900
618
  },
619
  {
620
  "epoch": 70.8,
621
  "learning_rate": 9.183333333333332e-05,
622
- "loss": 0.4998,
623
  "step": 8000
624
  },
625
  {
626
  "epoch": 70.8,
627
- "eval_loss": 0.6778160333633423,
628
- "eval_runtime": 56.1738,
629
- "eval_samples_per_second": 30.317,
630
- "eval_steps_per_second": 0.961,
631
- "eval_wer": 0.44682027649769585,
632
  "step": 8000
633
  },
634
  {
635
  "epoch": 71.68,
636
  "learning_rate": 8.905555555555555e-05,
637
- "loss": 0.4941,
638
  "step": 8100
639
  },
640
  {
641
  "epoch": 72.57,
642
  "learning_rate": 8.627777777777776e-05,
643
- "loss": 0.492,
644
  "step": 8200
645
  },
646
  {
647
  "epoch": 73.45,
648
  "learning_rate": 8.349999999999998e-05,
649
- "loss": 0.4741,
650
  "step": 8300
651
  },
652
  {
653
  "epoch": 74.34,
654
  "learning_rate": 8.072222222222222e-05,
655
- "loss": 0.495,
656
  "step": 8400
657
  },
658
  {
659
  "epoch": 75.22,
660
- "learning_rate": 7.794444444444445e-05,
661
- "loss": 0.4729,
662
  "step": 8500
663
  },
664
  {
665
  "epoch": 75.22,
666
- "eval_loss": 0.6383044719696045,
667
- "eval_runtime": 53.9538,
668
- "eval_samples_per_second": 31.564,
669
- "eval_steps_per_second": 1.001,
670
- "eval_wer": 0.4392626728110599,
671
  "step": 8500
672
  },
673
  {
674
  "epoch": 76.11,
675
- "learning_rate": 7.516666666666665e-05,
676
- "loss": 0.4696,
677
  "step": 8600
678
  },
679
  {
680
  "epoch": 76.99,
681
- "learning_rate": 7.238888888888889e-05,
682
- "loss": 0.4581,
683
  "step": 8700
684
  },
685
  {
686
  "epoch": 77.88,
687
- "learning_rate": 6.961111111111111e-05,
688
- "loss": 0.4583,
689
  "step": 8800
690
  },
691
  {
692
  "epoch": 78.76,
693
- "learning_rate": 6.683333333333332e-05,
694
- "loss": 0.4451,
695
  "step": 8900
696
  },
697
  {
698
  "epoch": 79.65,
699
- "learning_rate": 6.405555555555555e-05,
700
- "loss": 0.4535,
701
  "step": 9000
702
  },
703
  {
704
  "epoch": 79.65,
705
- "eval_loss": 0.6592639684677124,
706
- "eval_runtime": 53.7527,
707
- "eval_samples_per_second": 31.682,
708
- "eval_steps_per_second": 1.005,
709
- "eval_wer": 0.4368663594470046,
710
  "step": 9000
711
  },
712
  {
713
  "epoch": 80.53,
714
  "learning_rate": 6.130555555555555e-05,
715
- "loss": 0.4324,
716
  "step": 9100
717
  },
718
  {
719
  "epoch": 81.42,
720
  "learning_rate": 5.8527777777777774e-05,
721
- "loss": 0.4546,
722
  "step": 9200
723
  },
724
  {
725
  "epoch": 82.3,
726
  "learning_rate": 5.574999999999999e-05,
727
- "loss": 0.4391,
728
  "step": 9300
729
  },
730
  {
731
  "epoch": 83.19,
732
  "learning_rate": 5.297222222222222e-05,
733
- "loss": 0.4306,
734
  "step": 9400
735
  },
736
  {
737
  "epoch": 84.07,
738
  "learning_rate": 5.019444444444444e-05,
739
- "loss": 0.4358,
740
  "step": 9500
741
  },
742
  {
743
  "epoch": 84.07,
744
- "eval_loss": 0.6913911700248718,
745
- "eval_runtime": 55.4673,
746
- "eval_samples_per_second": 30.703,
747
- "eval_steps_per_second": 0.974,
748
- "eval_wer": 0.4422119815668203,
749
  "step": 9500
750
  },
751
  {
752
  "epoch": 84.96,
753
  "learning_rate": 4.741666666666666e-05,
754
- "loss": 0.4095,
755
  "step": 9600
756
  },
757
  {
758
  "epoch": 85.84,
759
  "learning_rate": 4.463888888888888e-05,
760
- "loss": 0.4148,
761
  "step": 9700
762
  },
763
  {
764
  "epoch": 86.73,
765
  "learning_rate": 4.186111111111111e-05,
766
- "loss": 0.4113,
767
  "step": 9800
768
  },
769
  {
770
  "epoch": 87.61,
771
  "learning_rate": 3.9083333333333326e-05,
772
- "loss": 0.405,
773
  "step": 9900
774
  },
775
  {
776
  "epoch": 88.5,
777
  "learning_rate": 3.630555555555555e-05,
778
- "loss": 0.402,
779
  "step": 10000
780
  },
781
  {
782
  "epoch": 88.5,
783
- "eval_loss": 0.6743763089179993,
784
- "eval_runtime": 53.7113,
785
- "eval_samples_per_second": 31.707,
786
- "eval_steps_per_second": 1.005,
787
- "eval_wer": 0.4269124423963134,
788
  "step": 10000
789
  },
790
  {
791
  "epoch": 89.38,
792
  "learning_rate": 3.352777777777777e-05,
793
- "loss": 0.3915,
794
  "step": 10100
795
  },
796
  {
797
  "epoch": 90.27,
798
  "learning_rate": 3.0749999999999995e-05,
799
- "loss": 0.394,
800
  "step": 10200
801
  },
802
  {
803
  "epoch": 91.15,
804
  "learning_rate": 2.7972222222222217e-05,
805
- "loss": 0.392,
806
  "step": 10300
807
  },
808
  {
809
  "epoch": 92.04,
810
  "learning_rate": 2.519444444444444e-05,
811
- "loss": 0.3937,
812
  "step": 10400
813
  },
814
  {
815
  "epoch": 92.92,
816
  "learning_rate": 2.2416666666666665e-05,
817
- "loss": 0.3946,
818
  "step": 10500
819
  },
820
  {
821
  "epoch": 92.92,
822
- "eval_loss": 0.6894700527191162,
823
- "eval_runtime": 55.0145,
824
- "eval_samples_per_second": 30.955,
825
- "eval_steps_per_second": 0.982,
826
- "eval_wer": 0.42746543778801843,
827
  "step": 10500
828
  },
829
  {
830
  "epoch": 93.81,
831
  "learning_rate": 1.9638888888888887e-05,
832
- "loss": 0.3881,
833
  "step": 10600
834
  },
835
  {
836
  "epoch": 94.69,
837
  "learning_rate": 1.686111111111111e-05,
838
- "loss": 0.3778,
839
  "step": 10700
840
  },
841
  {
842
  "epoch": 95.58,
843
  "learning_rate": 1.4083333333333331e-05,
844
- "loss": 0.3783,
845
  "step": 10800
846
  },
847
  {
848
  "epoch": 96.46,
849
  "learning_rate": 1.1305555555555553e-05,
850
- "loss": 0.3612,
851
  "step": 10900
852
  },
853
  {
854
  "epoch": 97.35,
855
  "learning_rate": 8.527777777777777e-06,
856
- "loss": 0.3734,
857
  "step": 11000
858
  },
859
  {
860
  "epoch": 97.35,
861
- "eval_loss": 0.6888979077339172,
862
- "eval_runtime": 55.289,
863
- "eval_samples_per_second": 30.802,
864
- "eval_steps_per_second": 0.977,
865
- "eval_wer": 0.4319815668202765,
866
  "step": 11000
867
  },
868
  {
869
  "epoch": 98.23,
870
  "learning_rate": 5.749999999999999e-06,
871
- "loss": 0.3635,
872
  "step": 11100
873
  },
874
  {
875
  "epoch": 99.12,
876
- "learning_rate": 2.9999999999999997e-06,
877
- "loss": 0.3775,
878
  "step": 11200
879
  },
880
  {
881
  "epoch": 100.0,
882
- "learning_rate": 2.222222222222222e-07,
883
- "loss": 0.3737,
884
  "step": 11300
885
  },
886
  {
887
  "epoch": 100.0,
888
  "step": 11300,
889
- "total_flos": 4.438667922365353e+19,
890
- "train_loss": 0.9041624231254105,
891
- "train_runtime": 18573.3759,
892
- "train_samples_per_second": 19.388,
893
- "train_steps_per_second": 0.608
894
  }
895
  ],
896
  "max_steps": 11300,
897
  "num_train_epochs": 100,
898
- "total_flos": 4.438667922365353e+19,
899
  "trial_name": null,
900
  "trial_params": null
901
  }
 
10
  {
11
  "epoch": 0.88,
12
  "learning_rate": 5.82e-05,
13
+ "loss": 10.5082,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 1.77,
18
  "learning_rate": 0.0001182,
19
+ "loss": 3.6378,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 2.65,
24
  "learning_rate": 0.00017819999999999997,
25
+ "loss": 3.2141,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 3.54,
30
  "learning_rate": 0.0002382,
31
+ "loss": 3.1644,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 4.42,
36
  "learning_rate": 0.0002982,
37
+ "loss": 3.0928,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 4.42,
42
+ "eval_loss": 3.0804343223571777,
43
+ "eval_runtime": 51.9529,
44
+ "eval_samples_per_second": 32.78,
45
+ "eval_steps_per_second": 1.039,
46
+ "eval_wer": 1.0072811059907834,
47
  "step": 500
48
  },
49
  {
50
  "epoch": 5.31,
51
  "learning_rate": 0.00029730555555555554,
52
+ "loss": 2.7805,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 6.19,
57
  "learning_rate": 0.0002945277777777777,
58
+ "loss": 1.9414,
59
  "step": 700
60
  },
61
  {
62
  "epoch": 7.08,
63
  "learning_rate": 0.00029174999999999996,
64
+ "loss": 1.6707,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 7.96,
69
  "learning_rate": 0.0002889722222222222,
70
+ "loss": 1.5232,
71
  "step": 900
72
  },
73
  {
74
  "epoch": 8.85,
75
  "learning_rate": 0.0002861944444444444,
76
+ "loss": 1.4505,
77
  "step": 1000
78
  },
79
  {
80
  "epoch": 8.85,
81
+ "eval_loss": 0.9038452506065369,
82
+ "eval_runtime": 51.3716,
83
+ "eval_samples_per_second": 33.151,
84
+ "eval_steps_per_second": 1.051,
85
+ "eval_wer": 0.7329953917050691,
86
  "step": 1000
87
  },
88
  {
89
  "epoch": 9.73,
90
  "learning_rate": 0.0002834166666666666,
91
+ "loss": 1.3908,
92
  "step": 1100
93
  },
94
  {
95
  "epoch": 10.62,
96
  "learning_rate": 0.00028063888888888886,
97
+ "loss": 1.3335,
98
  "step": 1200
99
  },
100
  {
101
  "epoch": 11.5,
102
  "learning_rate": 0.0002778611111111111,
103
+ "loss": 1.2725,
104
  "step": 1300
105
  },
106
  {
107
  "epoch": 12.39,
108
  "learning_rate": 0.0002750833333333333,
109
+ "loss": 1.2043,
110
  "step": 1400
111
  },
112
  {
113
  "epoch": 13.27,
114
  "learning_rate": 0.00027230555555555553,
115
+ "loss": 1.2207,
116
  "step": 1500
117
  },
118
  {
119
  "epoch": 13.27,
120
+ "eval_loss": 0.7374930381774902,
121
+ "eval_runtime": 51.4675,
122
+ "eval_samples_per_second": 33.089,
123
+ "eval_steps_per_second": 1.049,
124
+ "eval_wer": 0.604516129032258,
125
  "step": 1500
126
  },
127
  {
128
  "epoch": 14.16,
129
  "learning_rate": 0.00026952777777777777,
130
+ "loss": 1.1794,
131
  "step": 1600
132
  },
133
  {
134
  "epoch": 15.04,
135
  "learning_rate": 0.00026674999999999995,
136
+ "loss": 1.1502,
137
  "step": 1700
138
  },
139
  {
140
  "epoch": 15.93,
141
  "learning_rate": 0.0002639722222222222,
142
+ "loss": 1.1261,
143
  "step": 1800
144
  },
145
  {
146
  "epoch": 16.81,
147
  "learning_rate": 0.00026119444444444443,
148
+ "loss": 1.1082,
149
  "step": 1900
150
  },
151
  {
152
  "epoch": 17.7,
153
  "learning_rate": 0.0002584166666666666,
154
+ "loss": 1.0695,
155
  "step": 2000
156
  },
157
  {
158
  "epoch": 17.7,
159
+ "eval_loss": 0.7119461894035339,
160
+ "eval_runtime": 51.1988,
161
+ "eval_samples_per_second": 33.263,
162
+ "eval_steps_per_second": 1.055,
163
+ "eval_wer": 0.544147465437788,
164
  "step": 2000
165
  },
166
  {
167
  "epoch": 18.58,
168
  "learning_rate": 0.0002556388888888889,
169
+ "loss": 1.064,
170
  "step": 2100
171
  },
172
  {
173
  "epoch": 19.47,
174
  "learning_rate": 0.0002528611111111111,
175
+ "loss": 1.0327,
176
  "step": 2200
177
  },
178
  {
179
  "epoch": 20.35,
180
  "learning_rate": 0.0002500833333333333,
181
+ "loss": 1.0353,
182
  "step": 2300
183
  },
184
  {
185
  "epoch": 21.24,
186
  "learning_rate": 0.00024730555555555557,
187
+ "loss": 1.0108,
188
  "step": 2400
189
  },
190
  {
191
  "epoch": 22.12,
192
  "learning_rate": 0.00024452777777777776,
193
+ "loss": 1.0104,
194
  "step": 2500
195
  },
196
  {
197
  "epoch": 22.12,
198
+ "eval_loss": 0.6069046258926392,
199
+ "eval_runtime": 51.5986,
200
+ "eval_samples_per_second": 33.005,
201
+ "eval_steps_per_second": 1.047,
202
+ "eval_wer": 0.5295852534562212,
203
  "step": 2500
204
  },
205
  {
206
  "epoch": 23.01,
207
  "learning_rate": 0.00024174999999999997,
208
+ "loss": 0.9806,
209
  "step": 2600
210
  },
211
  {
212
  "epoch": 23.89,
213
  "learning_rate": 0.0002389722222222222,
214
+ "loss": 0.9712,
215
  "step": 2700
216
  },
217
  {
218
  "epoch": 24.78,
219
  "learning_rate": 0.00023619444444444442,
220
+ "loss": 0.9703,
221
  "step": 2800
222
  },
223
  {
224
  "epoch": 25.66,
225
  "learning_rate": 0.00023341666666666663,
226
+ "loss": 0.9298,
227
  "step": 2900
228
  },
229
  {
230
  "epoch": 26.55,
231
  "learning_rate": 0.00023063888888888887,
232
+ "loss": 0.9299,
233
  "step": 3000
234
  },
235
  {
236
  "epoch": 26.55,
237
+ "eval_loss": 0.6168299913406372,
238
+ "eval_runtime": 51.1346,
239
+ "eval_samples_per_second": 33.304,
240
+ "eval_steps_per_second": 1.056,
241
+ "eval_wer": 0.5206451612903226,
242
  "step": 3000
243
  },
244
  {
245
  "epoch": 27.43,
246
  "learning_rate": 0.00022786111111111108,
247
+ "loss": 0.9159,
248
  "step": 3100
249
  },
250
  {
251
  "epoch": 28.32,
252
  "learning_rate": 0.00022508333333333332,
253
+ "loss": 0.922,
254
  "step": 3200
255
  },
256
  {
257
  "epoch": 29.2,
258
  "learning_rate": 0.00022230555555555553,
259
+ "loss": 0.8775,
260
  "step": 3300
261
  },
262
  {
263
  "epoch": 30.09,
264
  "learning_rate": 0.00021952777777777774,
265
+ "loss": 0.889,
266
  "step": 3400
267
  },
268
  {
269
  "epoch": 30.97,
270
  "learning_rate": 0.00021674999999999998,
271
+ "loss": 0.8588,
272
  "step": 3500
273
  },
274
  {
275
  "epoch": 30.97,
276
+ "eval_loss": 0.6382256150245667,
277
+ "eval_runtime": 50.9107,
278
+ "eval_samples_per_second": 33.451,
279
+ "eval_steps_per_second": 1.061,
280
+ "eval_wer": 0.5170506912442396,
281
  "step": 3500
282
  },
283
  {
284
  "epoch": 31.86,
285
  "learning_rate": 0.0002139722222222222,
286
+ "loss": 0.865,
287
  "step": 3600
288
  },
289
  {
290
  "epoch": 32.74,
291
  "learning_rate": 0.0002111944444444444,
292
+ "loss": 0.8679,
293
  "step": 3700
294
  },
295
  {
296
  "epoch": 33.63,
297
  "learning_rate": 0.00020841666666666665,
298
+ "loss": 0.8034,
299
  "step": 3800
300
  },
301
  {
302
  "epoch": 34.51,
303
  "learning_rate": 0.00020563888888888886,
304
+ "loss": 0.7997,
305
  "step": 3900
306
  },
307
  {
308
  "epoch": 35.4,
309
  "learning_rate": 0.0002028611111111111,
310
+ "loss": 0.7942,
311
  "step": 4000
312
  },
313
  {
314
  "epoch": 35.4,
315
+ "eval_loss": 0.6048025488853455,
316
+ "eval_runtime": 50.6633,
317
+ "eval_samples_per_second": 33.614,
318
+ "eval_steps_per_second": 1.066,
319
+ "eval_wer": 0.49880184331797234,
320
  "step": 4000
321
  },
322
  {
323
  "epoch": 36.28,
324
+ "learning_rate": 0.00020011111111111108,
325
+ "loss": 0.796,
326
  "step": 4100
327
  },
328
  {
329
  "epoch": 37.17,
330
+ "learning_rate": 0.00019733333333333332,
331
+ "loss": 0.8013,
332
  "step": 4200
333
  },
334
  {
335
  "epoch": 38.05,
336
+ "learning_rate": 0.00019455555555555554,
337
+ "loss": 0.7807,
338
  "step": 4300
339
  },
340
  {
341
  "epoch": 38.94,
342
+ "learning_rate": 0.00019177777777777777,
343
+ "loss": 0.7579,
344
  "step": 4400
345
  },
346
  {
347
  "epoch": 39.82,
348
  "learning_rate": 0.00018899999999999999,
349
+ "loss": 0.7808,
350
  "step": 4500
351
  },
352
  {
353
  "epoch": 39.82,
354
+ "eval_loss": 0.6730400919914246,
355
+ "eval_runtime": 50.5688,
356
+ "eval_samples_per_second": 33.677,
357
+ "eval_steps_per_second": 1.068,
358
+ "eval_wer": 0.5083870967741936,
359
  "step": 4500
360
  },
361
  {
362
  "epoch": 40.71,
363
  "learning_rate": 0.0001862222222222222,
364
+ "loss": 0.7556,
365
  "step": 4600
366
  },
367
  {
368
  "epoch": 41.59,
369
  "learning_rate": 0.00018344444444444444,
370
+ "loss": 0.767,
371
  "step": 4700
372
  },
373
  {
374
  "epoch": 42.48,
375
  "learning_rate": 0.00018066666666666665,
376
+ "loss": 0.7401,
377
  "step": 4800
378
  },
379
  {
380
  "epoch": 43.36,
381
  "learning_rate": 0.00017788888888888886,
382
+ "loss": 0.7413,
383
  "step": 4900
384
  },
385
  {
386
  "epoch": 44.25,
387
  "learning_rate": 0.0001751111111111111,
388
+ "loss": 0.743,
389
  "step": 5000
390
  },
391
  {
392
  "epoch": 44.25,
393
+ "eval_loss": 0.6749109625816345,
394
+ "eval_runtime": 50.7919,
395
+ "eval_samples_per_second": 33.529,
396
+ "eval_steps_per_second": 1.063,
397
+ "eval_wer": 0.5011981566820276,
398
  "step": 5000
399
  },
400
  {
401
  "epoch": 45.13,
402
  "learning_rate": 0.0001723333333333333,
403
+ "loss": 0.6921,
404
  "step": 5100
405
  },
406
  {
407
  "epoch": 46.02,
408
  "learning_rate": 0.00016955555555555555,
409
+ "loss": 0.7032,
410
  "step": 5200
411
  },
412
  {
413
  "epoch": 46.9,
414
  "learning_rate": 0.00016677777777777776,
415
+ "loss": 0.688,
416
  "step": 5300
417
  },
418
  {
419
  "epoch": 47.79,
420
  "learning_rate": 0.00016399999999999997,
421
+ "loss": 0.6842,
422
  "step": 5400
423
  },
424
  {
425
  "epoch": 48.67,
426
  "learning_rate": 0.00016122222222222221,
427
+ "loss": 0.6652,
428
  "step": 5500
429
  },
430
  {
431
  "epoch": 48.67,
432
+ "eval_loss": 0.6491430401802063,
433
+ "eval_runtime": 50.5445,
434
+ "eval_samples_per_second": 33.693,
435
+ "eval_steps_per_second": 1.068,
436
+ "eval_wer": 0.4735483870967742,
437
  "step": 5500
438
  },
439
  {
440
  "epoch": 49.56,
441
+ "learning_rate": 0.00015844444444444443,
442
+ "loss": 0.6798,
443
  "step": 5600
444
  },
445
  {
446
  "epoch": 50.44,
447
+ "learning_rate": 0.00015566666666666664,
448
+ "loss": 0.6639,
449
  "step": 5700
450
  },
451
  {
452
  "epoch": 51.33,
453
+ "learning_rate": 0.00015288888888888888,
454
+ "loss": 0.6691,
455
  "step": 5800
456
  },
457
  {
458
  "epoch": 52.21,
459
+ "learning_rate": 0.0001501111111111111,
460
+ "loss": 0.6275,
461
  "step": 5900
462
  },
463
  {
464
  "epoch": 53.1,
465
+ "learning_rate": 0.00014733333333333333,
466
+ "loss": 0.6386,
467
  "step": 6000
468
  },
469
  {
470
  "epoch": 53.1,
471
+ "eval_loss": 0.6927830576896667,
472
+ "eval_runtime": 50.5958,
473
+ "eval_samples_per_second": 33.659,
474
+ "eval_steps_per_second": 1.067,
475
+ "eval_wer": 0.49539170506912444,
476
  "step": 6000
477
  },
478
  {
479
  "epoch": 53.98,
480
+ "learning_rate": 0.00014455555555555554,
481
+ "loss": 0.6288,
482
  "step": 6100
483
  },
484
  {
485
  "epoch": 54.87,
486
+ "learning_rate": 0.00014177777777777778,
487
+ "loss": 0.6247,
488
  "step": 6200
489
  },
490
  {
491
  "epoch": 55.75,
492
+ "learning_rate": 0.00013902777777777777,
493
+ "loss": 0.6245,
494
  "step": 6300
495
  },
496
  {
497
  "epoch": 56.64,
498
+ "learning_rate": 0.00013624999999999998,
499
+ "loss": 0.606,
500
  "step": 6400
501
  },
502
  {
503
  "epoch": 57.52,
504
+ "learning_rate": 0.00013347222222222222,
505
+ "loss": 0.5945,
506
  "step": 6500
507
  },
508
  {
509
  "epoch": 57.52,
510
+ "eval_loss": 0.6358979344367981,
511
+ "eval_runtime": 51.0247,
512
+ "eval_samples_per_second": 33.376,
513
+ "eval_steps_per_second": 1.058,
514
+ "eval_wer": 0.479815668202765,
515
  "step": 6500
516
  },
517
  {
518
  "epoch": 58.41,
519
  "learning_rate": 0.0001307222222222222,
520
+ "loss": 0.6067,
521
  "step": 6600
522
  },
523
  {
524
  "epoch": 59.29,
525
  "learning_rate": 0.00012794444444444442,
526
+ "loss": 0.5917,
527
  "step": 6700
528
  },
529
  {
530
  "epoch": 60.18,
531
  "learning_rate": 0.00012516666666666666,
532
+ "loss": 0.5729,
533
  "step": 6800
534
  },
535
  {
536
  "epoch": 61.06,
537
  "learning_rate": 0.0001223888888888889,
538
+ "loss": 0.5759,
539
  "step": 6900
540
  },
541
  {
542
  "epoch": 61.95,
543
  "learning_rate": 0.0001196111111111111,
544
+ "loss": 0.5561,
545
  "step": 7000
546
  },
547
  {
548
  "epoch": 61.95,
549
+ "eval_loss": 0.6409346461296082,
550
+ "eval_runtime": 51.2819,
551
+ "eval_samples_per_second": 33.209,
552
+ "eval_steps_per_second": 1.053,
553
+ "eval_wer": 0.4799078341013825,
554
  "step": 7000
555
  },
556
  {
557
  "epoch": 62.83,
558
  "learning_rate": 0.00011683333333333332,
559
+ "loss": 0.5496,
560
  "step": 7100
561
  },
562
  {
563
  "epoch": 63.72,
564
  "learning_rate": 0.00011405555555555554,
565
+ "loss": 0.5541,
566
  "step": 7200
567
  },
568
  {
569
  "epoch": 64.6,
570
  "learning_rate": 0.00011127777777777777,
571
+ "loss": 0.555,
572
  "step": 7300
573
  },
574
  {
575
  "epoch": 65.49,
576
  "learning_rate": 0.0001085,
577
+ "loss": 0.5393,
578
  "step": 7400
579
  },
580
  {
581
  "epoch": 66.37,
582
  "learning_rate": 0.0001057222222222222,
583
+ "loss": 0.5464,
584
  "step": 7500
585
  },
586
  {
587
  "epoch": 66.37,
588
+ "eval_loss": 0.6451661586761475,
589
+ "eval_runtime": 50.5882,
590
+ "eval_samples_per_second": 33.664,
591
+ "eval_steps_per_second": 1.067,
592
+ "eval_wer": 0.46912442396313364,
593
  "step": 7500
594
  },
595
  {
596
  "epoch": 67.26,
597
  "learning_rate": 0.00010294444444444443,
598
+ "loss": 0.5308,
599
  "step": 7600
600
  },
601
  {
602
  "epoch": 68.14,
603
  "learning_rate": 0.00010016666666666666,
604
+ "loss": 0.5304,
605
  "step": 7700
606
  },
607
  {
608
  "epoch": 69.03,
609
  "learning_rate": 9.738888888888888e-05,
610
+ "loss": 0.5278,
611
  "step": 7800
612
  },
613
  {
614
  "epoch": 69.91,
615
  "learning_rate": 9.46111111111111e-05,
616
+ "loss": 0.5112,
617
  "step": 7900
618
  },
619
  {
620
  "epoch": 70.8,
621
  "learning_rate": 9.183333333333332e-05,
622
+ "loss": 0.5119,
623
  "step": 8000
624
  },
625
  {
626
  "epoch": 70.8,
627
+ "eval_loss": 0.6376333832740784,
628
+ "eval_runtime": 50.3564,
629
+ "eval_samples_per_second": 33.819,
630
+ "eval_steps_per_second": 1.072,
631
+ "eval_wer": 0.4657142857142857,
632
  "step": 8000
633
  },
634
  {
635
  "epoch": 71.68,
636
  "learning_rate": 8.905555555555555e-05,
637
+ "loss": 0.4974,
638
  "step": 8100
639
  },
640
  {
641
  "epoch": 72.57,
642
  "learning_rate": 8.627777777777776e-05,
643
+ "loss": 0.4986,
644
  "step": 8200
645
  },
646
  {
647
  "epoch": 73.45,
648
  "learning_rate": 8.349999999999998e-05,
649
+ "loss": 0.4752,
650
  "step": 8300
651
  },
652
  {
653
  "epoch": 74.34,
654
  "learning_rate": 8.072222222222222e-05,
655
+ "loss": 0.4947,
656
  "step": 8400
657
  },
658
  {
659
  "epoch": 75.22,
660
+ "learning_rate": 7.797222222222222e-05,
661
+ "loss": 0.474,
662
  "step": 8500
663
  },
664
  {
665
  "epoch": 75.22,
666
+ "eval_loss": 0.6540603041648865,
667
+ "eval_runtime": 50.3966,
668
+ "eval_samples_per_second": 33.792,
669
+ "eval_steps_per_second": 1.072,
670
+ "eval_wer": 0.46995391705069123,
671
  "step": 8500
672
  },
673
  {
674
  "epoch": 76.11,
675
+ "learning_rate": 7.519444444444445e-05,
676
+ "loss": 0.4724,
677
  "step": 8600
678
  },
679
  {
680
  "epoch": 76.99,
681
+ "learning_rate": 7.241666666666666e-05,
682
+ "loss": 0.4668,
683
  "step": 8700
684
  },
685
  {
686
  "epoch": 77.88,
687
+ "learning_rate": 6.963888888888889e-05,
688
+ "loss": 0.46,
689
  "step": 8800
690
  },
691
  {
692
  "epoch": 78.76,
693
+ "learning_rate": 6.68611111111111e-05,
694
+ "loss": 0.45,
695
  "step": 8900
696
  },
697
  {
698
  "epoch": 79.65,
699
+ "learning_rate": 6.408333333333332e-05,
700
+ "loss": 0.45,
701
  "step": 9000
702
  },
703
  {
704
  "epoch": 79.65,
705
+ "eval_loss": 0.637355625629425,
706
+ "eval_runtime": 50.4224,
707
+ "eval_samples_per_second": 33.775,
708
+ "eval_steps_per_second": 1.071,
709
+ "eval_wer": 0.45714285714285713,
710
  "step": 9000
711
  },
712
  {
713
  "epoch": 80.53,
714
  "learning_rate": 6.130555555555555e-05,
715
+ "loss": 0.4282,
716
  "step": 9100
717
  },
718
  {
719
  "epoch": 81.42,
720
  "learning_rate": 5.8527777777777774e-05,
721
+ "loss": 0.4455,
722
  "step": 9200
723
  },
724
  {
725
  "epoch": 82.3,
726
  "learning_rate": 5.574999999999999e-05,
727
+ "loss": 0.4423,
728
  "step": 9300
729
  },
730
  {
731
  "epoch": 83.19,
732
  "learning_rate": 5.297222222222222e-05,
733
+ "loss": 0.4262,
734
  "step": 9400
735
  },
736
  {
737
  "epoch": 84.07,
738
  "learning_rate": 5.019444444444444e-05,
739
+ "loss": 0.4315,
740
  "step": 9500
741
  },
742
  {
743
  "epoch": 84.07,
744
+ "eval_loss": 0.6568067073822021,
745
+ "eval_runtime": 50.7772,
746
+ "eval_samples_per_second": 33.539,
747
+ "eval_steps_per_second": 1.063,
748
+ "eval_wer": 0.46248847926267284,
749
  "step": 9500
750
  },
751
  {
752
  "epoch": 84.96,
753
  "learning_rate": 4.741666666666666e-05,
754
+ "loss": 0.4069,
755
  "step": 9600
756
  },
757
  {
758
  "epoch": 85.84,
759
  "learning_rate": 4.463888888888888e-05,
760
+ "loss": 0.416,
761
  "step": 9700
762
  },
763
  {
764
  "epoch": 86.73,
765
  "learning_rate": 4.186111111111111e-05,
766
+ "loss": 0.4106,
767
  "step": 9800
768
  },
769
  {
770
  "epoch": 87.61,
771
  "learning_rate": 3.9083333333333326e-05,
772
+ "loss": 0.4025,
773
  "step": 9900
774
  },
775
  {
776
  "epoch": 88.5,
777
  "learning_rate": 3.630555555555555e-05,
778
+ "loss": 0.3967,
779
  "step": 10000
780
  },
781
  {
782
  "epoch": 88.5,
783
+ "eval_loss": 0.6636261343955994,
784
+ "eval_runtime": 50.7873,
785
+ "eval_samples_per_second": 33.532,
786
+ "eval_steps_per_second": 1.063,
787
+ "eval_wer": 0.46046082949308753,
788
  "step": 10000
789
  },
790
  {
791
  "epoch": 89.38,
792
  "learning_rate": 3.352777777777777e-05,
793
+ "loss": 0.3857,
794
  "step": 10100
795
  },
796
  {
797
  "epoch": 90.27,
798
  "learning_rate": 3.0749999999999995e-05,
799
+ "loss": 0.3991,
800
  "step": 10200
801
  },
802
  {
803
  "epoch": 91.15,
804
  "learning_rate": 2.7972222222222217e-05,
805
+ "loss": 0.3899,
806
  "step": 10300
807
  },
808
  {
809
  "epoch": 92.04,
810
  "learning_rate": 2.519444444444444e-05,
811
+ "loss": 0.3965,
812
  "step": 10400
813
  },
814
  {
815
  "epoch": 92.92,
816
  "learning_rate": 2.2416666666666665e-05,
817
+ "loss": 0.3937,
818
  "step": 10500
819
  },
820
  {
821
  "epoch": 92.92,
822
+ "eval_loss": 0.6537252068519592,
823
+ "eval_runtime": 49.8207,
824
+ "eval_samples_per_second": 34.183,
825
+ "eval_steps_per_second": 1.084,
826
+ "eval_wer": 0.4597235023041475,
827
  "step": 10500
828
  },
829
  {
830
  "epoch": 93.81,
831
  "learning_rate": 1.9638888888888887e-05,
832
+ "loss": 0.3808,
833
  "step": 10600
834
  },
835
  {
836
  "epoch": 94.69,
837
  "learning_rate": 1.686111111111111e-05,
838
+ "loss": 0.3701,
839
  "step": 10700
840
  },
841
  {
842
  "epoch": 95.58,
843
  "learning_rate": 1.4083333333333331e-05,
844
+ "loss": 0.3746,
845
  "step": 10800
846
  },
847
  {
848
  "epoch": 96.46,
849
  "learning_rate": 1.1305555555555553e-05,
850
+ "loss": 0.3647,
851
  "step": 10900
852
  },
853
  {
854
  "epoch": 97.35,
855
  "learning_rate": 8.527777777777777e-06,
856
+ "loss": 0.3788,
857
  "step": 11000
858
  },
859
  {
860
  "epoch": 97.35,
861
+ "eval_loss": 0.6614137887954712,
862
+ "eval_runtime": 50.2299,
863
+ "eval_samples_per_second": 33.904,
864
+ "eval_steps_per_second": 1.075,
865
+ "eval_wer": 0.45889400921658985,
866
  "step": 11000
867
  },
868
  {
869
  "epoch": 98.23,
870
  "learning_rate": 5.749999999999999e-06,
871
+ "loss": 0.3666,
872
  "step": 11100
873
  },
874
  {
875
  "epoch": 99.12,
876
+ "learning_rate": 2.972222222222222e-06,
877
+ "loss": 0.3701,
878
  "step": 11200
879
  },
880
  {
881
  "epoch": 100.0,
882
+ "learning_rate": 1.9444444444444442e-07,
883
+ "loss": 0.3755,
884
  "step": 11300
885
  },
886
  {
887
  "epoch": 100.0,
888
  "step": 11300,
889
+ "total_flos": 4.438624660507792e+19,
890
+ "train_loss": 0.9130245208740234,
891
+ "train_runtime": 17767.5681,
892
+ "train_samples_per_second": 20.267,
893
+ "train_steps_per_second": 0.636
894
  }
895
  ],
896
  "max_steps": 11300,
897
  "num_train_epochs": 100,
898
+ "total_flos": 4.438624660507792e+19,
899
  "trial_name": null,
900
  "trial_params": null
901
  }