cloudwalkerw commited on
Commit
7d1d382
1 Parent(s): 867e7e2

End of training

Browse files
Files changed (5) hide show
  1. README.md +2 -1
  2. all_results.json +12 -0
  3. eval_results.json +8 -0
  4. train_results.json +7 -0
  5. trainer_state.json +613 -0
README.md CHANGED
@@ -2,6 +2,7 @@
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-base
4
  tags:
 
5
  - generated_from_trainer
6
  metrics:
7
  - accuracy
@@ -17,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [facebook/wav2vec2-base](https://huggingface.co/facebook/wav2vec2-base) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.4227
21
  - Accuracy: 0.8974
22
 
23
  ## Model description
 
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-base
4
  tags:
5
+ - audio-classification
6
  - generated_from_trainer
7
  metrics:
8
  - accuracy
 
18
 
19
  This model is a fine-tuned version of [facebook/wav2vec2-base](https://huggingface.co/facebook/wav2vec2-base) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.4228
22
  - Accuracy: 0.8974
23
 
24
  ## Model description
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.98,
3
+ "eval_accuracy": 0.8974400257607471,
4
+ "eval_loss": 0.4227793216705322,
5
+ "eval_runtime": 491.8427,
6
+ "eval_samples_per_second": 50.512,
7
+ "eval_steps_per_second": 25.256,
8
+ "train_loss": 0.5994764679610127,
9
+ "train_runtime": 37020.076,
10
+ "train_samples_per_second": 6.856,
11
+ "train_steps_per_second": 0.107
12
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.98,
3
+ "eval_accuracy": 0.8974400257607471,
4
+ "eval_loss": 0.4227793216705322,
5
+ "eval_runtime": 491.8427,
6
+ "eval_samples_per_second": 50.512,
7
+ "eval_steps_per_second": 25.256
8
+ }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.98,
3
+ "train_loss": 0.5994764679610127,
4
+ "train_runtime": 37020.076,
5
+ "train_samples_per_second": 6.856,
6
+ "train_steps_per_second": 0.107
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,613 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.026571398600935936,
3
+ "best_model_checkpoint": "/home/cloudwalker/ASVmodel/facebook_wav2vec2-base/checkpoint-100",
4
+ "epoch": 9.98109640831758,
5
+ "eval_steps": 100,
6
+ "global_step": 3960,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.25,
13
+ "learning_rate": 7.575757575757576e-05,
14
+ "loss": 0.352,
15
+ "step": 100
16
+ },
17
+ {
18
+ "epoch": 0.25,
19
+ "eval_accuracy": 0.9960553856061826,
20
+ "eval_loss": 0.026571398600935936,
21
+ "eval_runtime": 488.9037,
22
+ "eval_samples_per_second": 50.816,
23
+ "eval_steps_per_second": 25.408,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.5,
28
+ "learning_rate": 0.00015151515151515152,
29
+ "loss": 0.2689,
30
+ "step": 200
31
+ },
32
+ {
33
+ "epoch": 0.5,
34
+ "eval_accuracy": 0.980800193205603,
35
+ "eval_loss": 0.21770231425762177,
36
+ "eval_runtime": 491.786,
37
+ "eval_samples_per_second": 50.518,
38
+ "eval_steps_per_second": 25.259,
39
+ "step": 200
40
+ },
41
+ {
42
+ "epoch": 0.76,
43
+ "learning_rate": 0.00022727272727272725,
44
+ "loss": 1.2723,
45
+ "step": 300
46
+ },
47
+ {
48
+ "epoch": 0.76,
49
+ "eval_accuracy": 0.992432780550636,
50
+ "eval_loss": 0.035364724695682526,
51
+ "eval_runtime": 491.4276,
52
+ "eval_samples_per_second": 50.555,
53
+ "eval_steps_per_second": 25.277,
54
+ "step": 300
55
+ },
56
+ {
57
+ "epoch": 1.01,
58
+ "learning_rate": 0.00029966329966329963,
59
+ "loss": 0.6624,
60
+ "step": 400
61
+ },
62
+ {
63
+ "epoch": 1.01,
64
+ "eval_accuracy": 0.8974400257607471,
65
+ "eval_loss": 0.4243304133415222,
66
+ "eval_runtime": 491.3199,
67
+ "eval_samples_per_second": 50.566,
68
+ "eval_steps_per_second": 25.283,
69
+ "step": 400
70
+ },
71
+ {
72
+ "epoch": 1.26,
73
+ "learning_rate": 0.00029124579124579125,
74
+ "loss": 0.5959,
75
+ "step": 500
76
+ },
77
+ {
78
+ "epoch": 1.26,
79
+ "eval_accuracy": 0.8974400257607471,
80
+ "eval_loss": 0.4804568290710449,
81
+ "eval_runtime": 491.6703,
82
+ "eval_samples_per_second": 50.53,
83
+ "eval_steps_per_second": 25.265,
84
+ "step": 500
85
+ },
86
+ {
87
+ "epoch": 1.51,
88
+ "learning_rate": 0.0002828282828282828,
89
+ "loss": 0.594,
90
+ "step": 600
91
+ },
92
+ {
93
+ "epoch": 1.51,
94
+ "eval_accuracy": 0.8974400257607471,
95
+ "eval_loss": 0.4400762617588043,
96
+ "eval_runtime": 491.6688,
97
+ "eval_samples_per_second": 50.53,
98
+ "eval_steps_per_second": 25.265,
99
+ "step": 600
100
+ },
101
+ {
102
+ "epoch": 1.76,
103
+ "learning_rate": 0.0002744107744107744,
104
+ "loss": 0.6017,
105
+ "step": 700
106
+ },
107
+ {
108
+ "epoch": 1.76,
109
+ "eval_accuracy": 0.8974400257607471,
110
+ "eval_loss": 0.4308442175388336,
111
+ "eval_runtime": 491.729,
112
+ "eval_samples_per_second": 50.524,
113
+ "eval_steps_per_second": 25.262,
114
+ "step": 700
115
+ },
116
+ {
117
+ "epoch": 2.02,
118
+ "learning_rate": 0.00026599326599326595,
119
+ "loss": 0.5973,
120
+ "step": 800
121
+ },
122
+ {
123
+ "epoch": 2.02,
124
+ "eval_accuracy": 0.8974400257607471,
125
+ "eval_loss": 0.39036858081817627,
126
+ "eval_runtime": 491.5847,
127
+ "eval_samples_per_second": 50.539,
128
+ "eval_steps_per_second": 25.269,
129
+ "step": 800
130
+ },
131
+ {
132
+ "epoch": 2.27,
133
+ "learning_rate": 0.00025757575757575756,
134
+ "loss": 0.6096,
135
+ "step": 900
136
+ },
137
+ {
138
+ "epoch": 2.27,
139
+ "eval_accuracy": 0.8974400257607471,
140
+ "eval_loss": 0.4003813862800598,
141
+ "eval_runtime": 491.46,
142
+ "eval_samples_per_second": 50.551,
143
+ "eval_steps_per_second": 25.276,
144
+ "step": 900
145
+ },
146
+ {
147
+ "epoch": 2.52,
148
+ "learning_rate": 0.00024915824915824913,
149
+ "loss": 0.5936,
150
+ "step": 1000
151
+ },
152
+ {
153
+ "epoch": 2.52,
154
+ "eval_accuracy": 0.8974400257607471,
155
+ "eval_loss": 0.41795614361763,
156
+ "eval_runtime": 491.8429,
157
+ "eval_samples_per_second": 50.512,
158
+ "eval_steps_per_second": 25.256,
159
+ "step": 1000
160
+ },
161
+ {
162
+ "epoch": 2.77,
163
+ "learning_rate": 0.00024074074074074072,
164
+ "loss": 0.5932,
165
+ "step": 1100
166
+ },
167
+ {
168
+ "epoch": 2.77,
169
+ "eval_accuracy": 0.8974400257607471,
170
+ "eval_loss": 0.4600105881690979,
171
+ "eval_runtime": 491.5387,
172
+ "eval_samples_per_second": 50.543,
173
+ "eval_steps_per_second": 25.272,
174
+ "step": 1100
175
+ },
176
+ {
177
+ "epoch": 3.02,
178
+ "learning_rate": 0.0002323232323232323,
179
+ "loss": 0.5884,
180
+ "step": 1200
181
+ },
182
+ {
183
+ "epoch": 3.02,
184
+ "eval_accuracy": 0.8974400257607471,
185
+ "eval_loss": 0.4334673285484314,
186
+ "eval_runtime": 491.9313,
187
+ "eval_samples_per_second": 50.503,
188
+ "eval_steps_per_second": 25.251,
189
+ "step": 1200
190
+ },
191
+ {
192
+ "epoch": 3.28,
193
+ "learning_rate": 0.0002239057239057239,
194
+ "loss": 0.5815,
195
+ "step": 1300
196
+ },
197
+ {
198
+ "epoch": 3.28,
199
+ "eval_accuracy": 0.8974400257607471,
200
+ "eval_loss": 0.37108397483825684,
201
+ "eval_runtime": 491.7823,
202
+ "eval_samples_per_second": 50.518,
203
+ "eval_steps_per_second": 25.259,
204
+ "step": 1300
205
+ },
206
+ {
207
+ "epoch": 3.53,
208
+ "learning_rate": 0.00021548821548821544,
209
+ "loss": 0.5923,
210
+ "step": 1400
211
+ },
212
+ {
213
+ "epoch": 3.53,
214
+ "eval_accuracy": 0.8974400257607471,
215
+ "eval_loss": 0.4266449213027954,
216
+ "eval_runtime": 491.763,
217
+ "eval_samples_per_second": 50.52,
218
+ "eval_steps_per_second": 25.26,
219
+ "step": 1400
220
+ },
221
+ {
222
+ "epoch": 3.78,
223
+ "learning_rate": 0.00020707070707070703,
224
+ "loss": 0.6062,
225
+ "step": 1500
226
+ },
227
+ {
228
+ "epoch": 3.78,
229
+ "eval_accuracy": 0.8974400257607471,
230
+ "eval_loss": 0.4494142532348633,
231
+ "eval_runtime": 491.657,
232
+ "eval_samples_per_second": 50.531,
233
+ "eval_steps_per_second": 25.266,
234
+ "step": 1500
235
+ },
236
+ {
237
+ "epoch": 4.03,
238
+ "learning_rate": 0.00019865319865319862,
239
+ "loss": 0.6025,
240
+ "step": 1600
241
+ },
242
+ {
243
+ "epoch": 4.03,
244
+ "eval_accuracy": 0.8974400257607471,
245
+ "eval_loss": 0.40983960032463074,
246
+ "eval_runtime": 491.6314,
247
+ "eval_samples_per_second": 50.534,
248
+ "eval_steps_per_second": 25.267,
249
+ "step": 1600
250
+ },
251
+ {
252
+ "epoch": 4.28,
253
+ "learning_rate": 0.00019023569023569022,
254
+ "loss": 0.5907,
255
+ "step": 1700
256
+ },
257
+ {
258
+ "epoch": 4.28,
259
+ "eval_accuracy": 0.8974400257607471,
260
+ "eval_loss": 0.3796265125274658,
261
+ "eval_runtime": 491.8727,
262
+ "eval_samples_per_second": 50.509,
263
+ "eval_steps_per_second": 25.255,
264
+ "step": 1700
265
+ },
266
+ {
267
+ "epoch": 4.54,
268
+ "learning_rate": 0.0001818181818181818,
269
+ "loss": 0.5933,
270
+ "step": 1800
271
+ },
272
+ {
273
+ "epoch": 4.54,
274
+ "eval_accuracy": 0.8974400257607471,
275
+ "eval_loss": 0.41142529249191284,
276
+ "eval_runtime": 491.9136,
277
+ "eval_samples_per_second": 50.505,
278
+ "eval_steps_per_second": 25.252,
279
+ "step": 1800
280
+ },
281
+ {
282
+ "epoch": 4.79,
283
+ "learning_rate": 0.0001734006734006734,
284
+ "loss": 0.5997,
285
+ "step": 1900
286
+ },
287
+ {
288
+ "epoch": 4.79,
289
+ "eval_accuracy": 0.8974400257607471,
290
+ "eval_loss": 0.4283621311187744,
291
+ "eval_runtime": 492.0918,
292
+ "eval_samples_per_second": 50.487,
293
+ "eval_steps_per_second": 25.243,
294
+ "step": 1900
295
+ },
296
+ {
297
+ "epoch": 5.04,
298
+ "learning_rate": 0.000164983164983165,
299
+ "loss": 0.6028,
300
+ "step": 2000
301
+ },
302
+ {
303
+ "epoch": 5.04,
304
+ "eval_accuracy": 0.8974400257607471,
305
+ "eval_loss": 0.4269249737262726,
306
+ "eval_runtime": 492.0087,
307
+ "eval_samples_per_second": 50.495,
308
+ "eval_steps_per_second": 25.248,
309
+ "step": 2000
310
+ },
311
+ {
312
+ "epoch": 5.29,
313
+ "learning_rate": 0.00015656565656565653,
314
+ "loss": 0.5936,
315
+ "step": 2100
316
+ },
317
+ {
318
+ "epoch": 5.29,
319
+ "eval_accuracy": 0.8974400257607471,
320
+ "eval_loss": 0.4423413872718811,
321
+ "eval_runtime": 492.1387,
322
+ "eval_samples_per_second": 50.482,
323
+ "eval_steps_per_second": 25.241,
324
+ "step": 2100
325
+ },
326
+ {
327
+ "epoch": 5.55,
328
+ "learning_rate": 0.00014814814814814812,
329
+ "loss": 0.5994,
330
+ "step": 2200
331
+ },
332
+ {
333
+ "epoch": 5.55,
334
+ "eval_accuracy": 0.8974400257607471,
335
+ "eval_loss": 0.43968868255615234,
336
+ "eval_runtime": 491.824,
337
+ "eval_samples_per_second": 50.514,
338
+ "eval_steps_per_second": 25.257,
339
+ "step": 2200
340
+ },
341
+ {
342
+ "epoch": 5.8,
343
+ "learning_rate": 0.0001397306397306397,
344
+ "loss": 0.5937,
345
+ "step": 2300
346
+ },
347
+ {
348
+ "epoch": 5.8,
349
+ "eval_accuracy": 0.8974400257607471,
350
+ "eval_loss": 0.43047070503234863,
351
+ "eval_runtime": 492.196,
352
+ "eval_samples_per_second": 50.476,
353
+ "eval_steps_per_second": 25.238,
354
+ "step": 2300
355
+ },
356
+ {
357
+ "epoch": 6.05,
358
+ "learning_rate": 0.0001313131313131313,
359
+ "loss": 0.5958,
360
+ "step": 2400
361
+ },
362
+ {
363
+ "epoch": 6.05,
364
+ "eval_accuracy": 0.8974400257607471,
365
+ "eval_loss": 0.43379682302474976,
366
+ "eval_runtime": 492.4115,
367
+ "eval_samples_per_second": 50.454,
368
+ "eval_steps_per_second": 25.227,
369
+ "step": 2400
370
+ },
371
+ {
372
+ "epoch": 6.3,
373
+ "learning_rate": 0.0001228956228956229,
374
+ "loss": 0.5984,
375
+ "step": 2500
376
+ },
377
+ {
378
+ "epoch": 6.3,
379
+ "eval_accuracy": 0.8974400257607471,
380
+ "eval_loss": 0.394501656293869,
381
+ "eval_runtime": 491.7814,
382
+ "eval_samples_per_second": 50.518,
383
+ "eval_steps_per_second": 25.259,
384
+ "step": 2500
385
+ },
386
+ {
387
+ "epoch": 6.55,
388
+ "learning_rate": 0.00011447811447811446,
389
+ "loss": 0.5943,
390
+ "step": 2600
391
+ },
392
+ {
393
+ "epoch": 6.55,
394
+ "eval_accuracy": 0.8974400257607471,
395
+ "eval_loss": 0.38777557015419006,
396
+ "eval_runtime": 492.0653,
397
+ "eval_samples_per_second": 50.489,
398
+ "eval_steps_per_second": 25.245,
399
+ "step": 2600
400
+ },
401
+ {
402
+ "epoch": 6.81,
403
+ "learning_rate": 0.00010606060606060605,
404
+ "loss": 0.5819,
405
+ "step": 2700
406
+ },
407
+ {
408
+ "epoch": 6.81,
409
+ "eval_accuracy": 0.8974400257607471,
410
+ "eval_loss": 0.42347970604896545,
411
+ "eval_runtime": 491.9589,
412
+ "eval_samples_per_second": 50.5,
413
+ "eval_steps_per_second": 25.25,
414
+ "step": 2700
415
+ },
416
+ {
417
+ "epoch": 7.06,
418
+ "learning_rate": 9.764309764309764e-05,
419
+ "loss": 0.594,
420
+ "step": 2800
421
+ },
422
+ {
423
+ "epoch": 7.06,
424
+ "eval_accuracy": 0.8974400257607471,
425
+ "eval_loss": 0.4160361886024475,
426
+ "eval_runtime": 491.6691,
427
+ "eval_samples_per_second": 50.53,
428
+ "eval_steps_per_second": 25.265,
429
+ "step": 2800
430
+ },
431
+ {
432
+ "epoch": 7.31,
433
+ "learning_rate": 8.92255892255892e-05,
434
+ "loss": 0.5883,
435
+ "step": 2900
436
+ },
437
+ {
438
+ "epoch": 7.31,
439
+ "eval_accuracy": 0.8974400257607471,
440
+ "eval_loss": 0.4075636565685272,
441
+ "eval_runtime": 492.0942,
442
+ "eval_samples_per_second": 50.486,
443
+ "eval_steps_per_second": 25.243,
444
+ "step": 2900
445
+ },
446
+ {
447
+ "epoch": 7.56,
448
+ "learning_rate": 8.08080808080808e-05,
449
+ "loss": 0.5877,
450
+ "step": 3000
451
+ },
452
+ {
453
+ "epoch": 7.56,
454
+ "eval_accuracy": 0.8974400257607471,
455
+ "eval_loss": 0.4212785065174103,
456
+ "eval_runtime": 491.8,
457
+ "eval_samples_per_second": 50.516,
458
+ "eval_steps_per_second": 25.258,
459
+ "step": 3000
460
+ },
461
+ {
462
+ "epoch": 7.81,
463
+ "learning_rate": 7.239057239057239e-05,
464
+ "loss": 0.5939,
465
+ "step": 3100
466
+ },
467
+ {
468
+ "epoch": 7.81,
469
+ "eval_accuracy": 0.8974400257607471,
470
+ "eval_loss": 0.4088614583015442,
471
+ "eval_runtime": 492.134,
472
+ "eval_samples_per_second": 50.482,
473
+ "eval_steps_per_second": 25.241,
474
+ "step": 3100
475
+ },
476
+ {
477
+ "epoch": 8.07,
478
+ "learning_rate": 6.397306397306397e-05,
479
+ "loss": 0.6025,
480
+ "step": 3200
481
+ },
482
+ {
483
+ "epoch": 8.07,
484
+ "eval_accuracy": 0.8974400257607471,
485
+ "eval_loss": 0.4384858310222626,
486
+ "eval_runtime": 491.7955,
487
+ "eval_samples_per_second": 50.517,
488
+ "eval_steps_per_second": 25.258,
489
+ "step": 3200
490
+ },
491
+ {
492
+ "epoch": 8.32,
493
+ "learning_rate": 5.5555555555555545e-05,
494
+ "loss": 0.6016,
495
+ "step": 3300
496
+ },
497
+ {
498
+ "epoch": 8.32,
499
+ "eval_accuracy": 0.8974400257607471,
500
+ "eval_loss": 0.4373140335083008,
501
+ "eval_runtime": 491.7882,
502
+ "eval_samples_per_second": 50.518,
503
+ "eval_steps_per_second": 25.259,
504
+ "step": 3300
505
+ },
506
+ {
507
+ "epoch": 8.57,
508
+ "learning_rate": 4.7138047138047136e-05,
509
+ "loss": 0.5815,
510
+ "step": 3400
511
+ },
512
+ {
513
+ "epoch": 8.57,
514
+ "eval_accuracy": 0.8974400257607471,
515
+ "eval_loss": 0.4190850555896759,
516
+ "eval_runtime": 492.4127,
517
+ "eval_samples_per_second": 50.454,
518
+ "eval_steps_per_second": 25.227,
519
+ "step": 3400
520
+ },
521
+ {
522
+ "epoch": 8.82,
523
+ "learning_rate": 3.8720538720538714e-05,
524
+ "loss": 0.5915,
525
+ "step": 3500
526
+ },
527
+ {
528
+ "epoch": 8.82,
529
+ "eval_accuracy": 0.8974400257607471,
530
+ "eval_loss": 0.4215960204601288,
531
+ "eval_runtime": 491.8471,
532
+ "eval_samples_per_second": 50.512,
533
+ "eval_steps_per_second": 25.256,
534
+ "step": 3500
535
+ },
536
+ {
537
+ "epoch": 9.07,
538
+ "learning_rate": 3.03030303030303e-05,
539
+ "loss": 0.602,
540
+ "step": 3600
541
+ },
542
+ {
543
+ "epoch": 9.07,
544
+ "eval_accuracy": 0.8974400257607471,
545
+ "eval_loss": 0.4336757957935333,
546
+ "eval_runtime": 491.8986,
547
+ "eval_samples_per_second": 50.506,
548
+ "eval_steps_per_second": 25.253,
549
+ "step": 3600
550
+ },
551
+ {
552
+ "epoch": 9.33,
553
+ "learning_rate": 2.1885521885521884e-05,
554
+ "loss": 0.5907,
555
+ "step": 3700
556
+ },
557
+ {
558
+ "epoch": 9.33,
559
+ "eval_accuracy": 0.8974400257607471,
560
+ "eval_loss": 0.4128669798374176,
561
+ "eval_runtime": 491.7648,
562
+ "eval_samples_per_second": 50.52,
563
+ "eval_steps_per_second": 25.26,
564
+ "step": 3700
565
+ },
566
+ {
567
+ "epoch": 9.58,
568
+ "learning_rate": 1.3468013468013465e-05,
569
+ "loss": 0.603,
570
+ "step": 3800
571
+ },
572
+ {
573
+ "epoch": 9.58,
574
+ "eval_accuracy": 0.8974400257607471,
575
+ "eval_loss": 0.4216155707836151,
576
+ "eval_runtime": 491.5621,
577
+ "eval_samples_per_second": 50.541,
578
+ "eval_steps_per_second": 25.27,
579
+ "step": 3800
580
+ },
581
+ {
582
+ "epoch": 9.83,
583
+ "learning_rate": 5.05050505050505e-06,
584
+ "loss": 0.593,
585
+ "step": 3900
586
+ },
587
+ {
588
+ "epoch": 9.83,
589
+ "eval_accuracy": 0.8974400257607471,
590
+ "eval_loss": 0.42267537117004395,
591
+ "eval_runtime": 491.786,
592
+ "eval_samples_per_second": 50.518,
593
+ "eval_steps_per_second": 25.259,
594
+ "step": 3900
595
+ },
596
+ {
597
+ "epoch": 9.98,
598
+ "step": 3960,
599
+ "total_flos": 1.5053705916862923e+19,
600
+ "train_loss": 0.5994764679610127,
601
+ "train_runtime": 37020.076,
602
+ "train_samples_per_second": 6.856,
603
+ "train_steps_per_second": 0.107
604
+ }
605
+ ],
606
+ "logging_steps": 100,
607
+ "max_steps": 3960,
608
+ "num_train_epochs": 10,
609
+ "save_steps": 100,
610
+ "total_flos": 1.5053705916862923e+19,
611
+ "trial_name": null,
612
+ "trial_params": null
613
+ }