bmwrks commited on
Commit
15f2a0d
1 Parent(s): e8742c8

End of training

Browse files
Files changed (5) hide show
  1. README.md +87 -0
  2. config.json +486 -0
  3. model.safetensors +3 -0
  4. preprocessor_config.json +9 -0
  5. training_args.bin +3 -0
README.md ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: facebook/wav2vec2-base
4
+ tags:
5
+ - generated_from_trainer
6
+ datasets:
7
+ - audiofolder
8
+ metrics:
9
+ - accuracy
10
+ model-index:
11
+ - name: my_birdcall_model
12
+ results:
13
+ - task:
14
+ name: Audio Classification
15
+ type: audio-classification
16
+ dataset:
17
+ name: audiofolder
18
+ type: audiofolder
19
+ config: rb
20
+ split: train[:5000]
21
+ args: rb
22
+ metrics:
23
+ - name: Accuracy
24
+ type: accuracy
25
+ value: 0.26
26
+ ---
27
+
28
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
29
+ should probably proofread and complete it, then remove this comment. -->
30
+
31
+ # my_birdcall_model
32
+
33
+ This model is a fine-tuned version of [facebook/wav2vec2-base](https://huggingface.co/facebook/wav2vec2-base) on the audiofolder dataset.
34
+ It achieves the following results on the evaluation set:
35
+ - Loss: 3.1584
36
+ - Accuracy: 0.26
37
+
38
+ ## Model description
39
+
40
+ More information needed
41
+
42
+ ## Intended uses & limitations
43
+
44
+ More information needed
45
+
46
+ ## Training and evaluation data
47
+
48
+ More information needed
49
+
50
+ ## Training procedure
51
+
52
+ ### Training hyperparameters
53
+
54
+ The following hyperparameters were used during training:
55
+ - learning_rate: 3e-05
56
+ - train_batch_size: 32
57
+ - eval_batch_size: 32
58
+ - seed: 42
59
+ - gradient_accumulation_steps: 4
60
+ - total_train_batch_size: 128
61
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
62
+ - lr_scheduler_type: linear
63
+ - lr_scheduler_warmup_ratio: 0.1
64
+ - num_epochs: 10
65
+
66
+ ### Training results
67
+
68
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
69
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
70
+ | 4.9434 | 0.99 | 31 | 4.8016 | 0.108 |
71
+ | 4.5209 | 1.98 | 62 | 4.3832 | 0.108 |
72
+ | 4.1573 | 2.98 | 93 | 3.9995 | 0.108 |
73
+ | 3.8211 | 4.0 | 125 | 3.6762 | 0.108 |
74
+ | 3.5876 | 4.99 | 156 | 3.4586 | 0.152 |
75
+ | 3.4453 | 5.98 | 187 | 3.3284 | 0.191 |
76
+ | 3.313 | 6.98 | 218 | 3.2432 | 0.21 |
77
+ | 3.2369 | 8.0 | 250 | 3.1993 | 0.223 |
78
+ | 3.2286 | 8.99 | 281 | 3.1712 | 0.23 |
79
+ | 3.1867 | 9.92 | 310 | 3.1584 | 0.26 |
80
+
81
+
82
+ ### Framework versions
83
+
84
+ - Transformers 4.38.0
85
+ - Pytorch 2.3.0+cu121
86
+ - Datasets 2.15.0
87
+ - Tokenizers 0.15.0
config.json ADDED
@@ -0,0 +1,486 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-base",
3
+ "activation_dropout": 0.0,
4
+ "adapter_attn_dim": null,
5
+ "adapter_kernel_size": 3,
6
+ "adapter_stride": 2,
7
+ "add_adapter": false,
8
+ "apply_spec_augment": true,
9
+ "architectures": [
10
+ "Wav2Vec2ForSequenceClassification"
11
+ ],
12
+ "attention_dropout": 0.1,
13
+ "bos_token_id": 1,
14
+ "classifier_proj_size": 256,
15
+ "codevector_dim": 256,
16
+ "contrastive_logits_temperature": 0.1,
17
+ "conv_bias": false,
18
+ "conv_dim": [
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512,
25
+ 512
26
+ ],
27
+ "conv_kernel": [
28
+ 10,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 3,
33
+ 2,
34
+ 2
35
+ ],
36
+ "conv_stride": [
37
+ 5,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2,
43
+ 2
44
+ ],
45
+ "ctc_loss_reduction": "sum",
46
+ "ctc_zero_infinity": false,
47
+ "diversity_loss_weight": 0.1,
48
+ "do_stable_layer_norm": false,
49
+ "eos_token_id": 2,
50
+ "feat_extract_activation": "gelu",
51
+ "feat_extract_norm": "group",
52
+ "feat_proj_dropout": 0.1,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "freeze_feat_extract_train": true,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.1,
58
+ "hidden_size": 768,
59
+ "id2label": {
60
+ "0": "asbfly",
61
+ "1": "ashdro1",
62
+ "10": "bcnher",
63
+ "100": "labcro1",
64
+ "101": "laudov1",
65
+ "102": "lblwar1",
66
+ "103": "lesyel1",
67
+ "104": "lewduc1",
68
+ "105": "lirplo",
69
+ "106": "litegr",
70
+ "107": "litgre1",
71
+ "108": "litspi1",
72
+ "109": "litswi1",
73
+ "11": "bkcbul1",
74
+ "110": "lobsun2",
75
+ "111": "maghor2",
76
+ "112": "malpar1",
77
+ "113": "maltro1",
78
+ "114": "malwoo1",
79
+ "115": "marsan",
80
+ "116": "mawthr1",
81
+ "117": "moipig1",
82
+ "118": "nilfly2",
83
+ "119": "niwpig1",
84
+ "12": "bkrfla1",
85
+ "120": "nutman",
86
+ "121": "orihob2",
87
+ "122": "oripip1",
88
+ "123": "pabflo1",
89
+ "124": "paisto1",
90
+ "125": "piebus1",
91
+ "126": "piekin1",
92
+ "127": "placuc3",
93
+ "128": "plaflo1",
94
+ "129": "plapri1",
95
+ "13": "bkskit1",
96
+ "130": "plhpar1",
97
+ "131": "pomgrp2",
98
+ "132": "purher1",
99
+ "133": "pursun3",
100
+ "134": "pursun4",
101
+ "135": "purswa3",
102
+ "136": "putbab1",
103
+ "137": "redspu1",
104
+ "138": "rerswa1",
105
+ "139": "revbul",
106
+ "14": "bkwsti",
107
+ "140": "rewbul",
108
+ "141": "rewlap1",
109
+ "142": "rocpig",
110
+ "143": "rorpar",
111
+ "144": "rossta2",
112
+ "145": "rufbab3",
113
+ "146": "ruftre2",
114
+ "147": "rufwoo2",
115
+ "148": "rutfly6",
116
+ "149": "sbeowl1",
117
+ "15": "bladro1",
118
+ "150": "scamin3",
119
+ "151": "shikra1",
120
+ "152": "smamin1",
121
+ "153": "sohmyn1",
122
+ "154": "spepic1",
123
+ "155": "spodov",
124
+ "156": "spoowl1",
125
+ "157": "sqtbul1",
126
+ "158": "stbkin1",
127
+ "159": "sttwoo1",
128
+ "16": "blaeag1",
129
+ "160": "thbwar1",
130
+ "161": "tibfly3",
131
+ "162": "tilwar1",
132
+ "163": "vefnut1",
133
+ "164": "vehpar1",
134
+ "165": "wbbfly1",
135
+ "166": "wemhar1",
136
+ "167": "whbbul2",
137
+ "168": "whbsho3",
138
+ "169": "whbtre1",
139
+ "17": "blakit1",
140
+ "170": "whbwag1",
141
+ "171": "whbwat1",
142
+ "172": "whbwoo2",
143
+ "173": "whcbar1",
144
+ "174": "whiter2",
145
+ "175": "whrmun",
146
+ "176": "whtkin2",
147
+ "177": "woosan",
148
+ "178": "wynlau1",
149
+ "179": "yebbab1",
150
+ "18": "blhori1",
151
+ "180": "yebbul3",
152
+ "181": "zitcis1",
153
+ "19": "blnmon1",
154
+ "2": "ashpri1",
155
+ "20": "blrwar1",
156
+ "21": "bncwoo3",
157
+ "22": "brakit1",
158
+ "23": "brasta1",
159
+ "24": "brcful1",
160
+ "25": "brfowl1",
161
+ "26": "brnhao1",
162
+ "27": "brnshr",
163
+ "28": "brodro1",
164
+ "29": "brwjac1",
165
+ "3": "ashwoo2",
166
+ "30": "brwowl1",
167
+ "31": "btbeat1",
168
+ "32": "bwfshr1",
169
+ "33": "categr",
170
+ "34": "chbeat1",
171
+ "35": "cohcuc1",
172
+ "36": "comfla1",
173
+ "37": "comgre",
174
+ "38": "comior1",
175
+ "39": "comkin1",
176
+ "4": "asikoe2",
177
+ "40": "commoo3",
178
+ "41": "commyn",
179
+ "42": "compea",
180
+ "43": "comros",
181
+ "44": "comsan",
182
+ "45": "comtai1",
183
+ "46": "copbar1",
184
+ "47": "crbsun2",
185
+ "48": "cregos1",
186
+ "49": "crfbar1",
187
+ "5": "asiope1",
188
+ "50": "crseag1",
189
+ "51": "dafbab1",
190
+ "52": "darter2",
191
+ "53": "eaywag1",
192
+ "54": "emedov2",
193
+ "55": "eucdov",
194
+ "56": "eurbla2",
195
+ "57": "eurcoo",
196
+ "58": "forwag1",
197
+ "59": "gargan",
198
+ "6": "aspfly1",
199
+ "60": "gloibi",
200
+ "61": "goflea1",
201
+ "62": "graher1",
202
+ "63": "grbeat1",
203
+ "64": "grecou1",
204
+ "65": "greegr",
205
+ "66": "grefla1",
206
+ "67": "grehor1",
207
+ "68": "grejun2",
208
+ "69": "grenig1",
209
+ "7": "aspswi1",
210
+ "70": "grewar3",
211
+ "71": "grnsan",
212
+ "72": "grnwar1",
213
+ "73": "grtdro1",
214
+ "74": "gryfra",
215
+ "75": "grynig2",
216
+ "76": "grywag",
217
+ "77": "gybpri1",
218
+ "78": "gyhcaf1",
219
+ "79": "heswoo1",
220
+ "8": "barfly1",
221
+ "80": "hoopoe",
222
+ "81": "houcro1",
223
+ "82": "houspa",
224
+ "83": "inbrob1",
225
+ "84": "indpit1",
226
+ "85": "indrob1",
227
+ "86": "indrol2",
228
+ "87": "indtit1",
229
+ "88": "ingori1",
230
+ "89": "inpher1",
231
+ "9": "barswa",
232
+ "90": "insbab1",
233
+ "91": "insowl1",
234
+ "92": "integr",
235
+ "93": "isbduc1",
236
+ "94": "jerbus2",
237
+ "95": "junbab2",
238
+ "96": "junmyn1",
239
+ "97": "junowl1",
240
+ "98": "kenplo1",
241
+ "99": "kerlau2"
242
+ },
243
+ "initializer_range": 0.02,
244
+ "intermediate_size": 3072,
245
+ "label2id": {
246
+ "asbfly": "0",
247
+ "ashdro1": "1",
248
+ "ashpri1": "2",
249
+ "ashwoo2": "3",
250
+ "asikoe2": "4",
251
+ "asiope1": "5",
252
+ "aspfly1": "6",
253
+ "aspswi1": "7",
254
+ "barfly1": "8",
255
+ "barswa": "9",
256
+ "bcnher": "10",
257
+ "bkcbul1": "11",
258
+ "bkrfla1": "12",
259
+ "bkskit1": "13",
260
+ "bkwsti": "14",
261
+ "bladro1": "15",
262
+ "blaeag1": "16",
263
+ "blakit1": "17",
264
+ "blhori1": "18",
265
+ "blnmon1": "19",
266
+ "blrwar1": "20",
267
+ "bncwoo3": "21",
268
+ "brakit1": "22",
269
+ "brasta1": "23",
270
+ "brcful1": "24",
271
+ "brfowl1": "25",
272
+ "brnhao1": "26",
273
+ "brnshr": "27",
274
+ "brodro1": "28",
275
+ "brwjac1": "29",
276
+ "brwowl1": "30",
277
+ "btbeat1": "31",
278
+ "bwfshr1": "32",
279
+ "categr": "33",
280
+ "chbeat1": "34",
281
+ "cohcuc1": "35",
282
+ "comfla1": "36",
283
+ "comgre": "37",
284
+ "comior1": "38",
285
+ "comkin1": "39",
286
+ "commoo3": "40",
287
+ "commyn": "41",
288
+ "compea": "42",
289
+ "comros": "43",
290
+ "comsan": "44",
291
+ "comtai1": "45",
292
+ "copbar1": "46",
293
+ "crbsun2": "47",
294
+ "cregos1": "48",
295
+ "crfbar1": "49",
296
+ "crseag1": "50",
297
+ "dafbab1": "51",
298
+ "darter2": "52",
299
+ "eaywag1": "53",
300
+ "emedov2": "54",
301
+ "eucdov": "55",
302
+ "eurbla2": "56",
303
+ "eurcoo": "57",
304
+ "forwag1": "58",
305
+ "gargan": "59",
306
+ "gloibi": "60",
307
+ "goflea1": "61",
308
+ "graher1": "62",
309
+ "grbeat1": "63",
310
+ "grecou1": "64",
311
+ "greegr": "65",
312
+ "grefla1": "66",
313
+ "grehor1": "67",
314
+ "grejun2": "68",
315
+ "grenig1": "69",
316
+ "grewar3": "70",
317
+ "grnsan": "71",
318
+ "grnwar1": "72",
319
+ "grtdro1": "73",
320
+ "gryfra": "74",
321
+ "grynig2": "75",
322
+ "grywag": "76",
323
+ "gybpri1": "77",
324
+ "gyhcaf1": "78",
325
+ "heswoo1": "79",
326
+ "hoopoe": "80",
327
+ "houcro1": "81",
328
+ "houspa": "82",
329
+ "inbrob1": "83",
330
+ "indpit1": "84",
331
+ "indrob1": "85",
332
+ "indrol2": "86",
333
+ "indtit1": "87",
334
+ "ingori1": "88",
335
+ "inpher1": "89",
336
+ "insbab1": "90",
337
+ "insowl1": "91",
338
+ "integr": "92",
339
+ "isbduc1": "93",
340
+ "jerbus2": "94",
341
+ "junbab2": "95",
342
+ "junmyn1": "96",
343
+ "junowl1": "97",
344
+ "kenplo1": "98",
345
+ "kerlau2": "99",
346
+ "labcro1": "100",
347
+ "laudov1": "101",
348
+ "lblwar1": "102",
349
+ "lesyel1": "103",
350
+ "lewduc1": "104",
351
+ "lirplo": "105",
352
+ "litegr": "106",
353
+ "litgre1": "107",
354
+ "litspi1": "108",
355
+ "litswi1": "109",
356
+ "lobsun2": "110",
357
+ "maghor2": "111",
358
+ "malpar1": "112",
359
+ "maltro1": "113",
360
+ "malwoo1": "114",
361
+ "marsan": "115",
362
+ "mawthr1": "116",
363
+ "moipig1": "117",
364
+ "nilfly2": "118",
365
+ "niwpig1": "119",
366
+ "nutman": "120",
367
+ "orihob2": "121",
368
+ "oripip1": "122",
369
+ "pabflo1": "123",
370
+ "paisto1": "124",
371
+ "piebus1": "125",
372
+ "piekin1": "126",
373
+ "placuc3": "127",
374
+ "plaflo1": "128",
375
+ "plapri1": "129",
376
+ "plhpar1": "130",
377
+ "pomgrp2": "131",
378
+ "purher1": "132",
379
+ "pursun3": "133",
380
+ "pursun4": "134",
381
+ "purswa3": "135",
382
+ "putbab1": "136",
383
+ "redspu1": "137",
384
+ "rerswa1": "138",
385
+ "revbul": "139",
386
+ "rewbul": "140",
387
+ "rewlap1": "141",
388
+ "rocpig": "142",
389
+ "rorpar": "143",
390
+ "rossta2": "144",
391
+ "rufbab3": "145",
392
+ "ruftre2": "146",
393
+ "rufwoo2": "147",
394
+ "rutfly6": "148",
395
+ "sbeowl1": "149",
396
+ "scamin3": "150",
397
+ "shikra1": "151",
398
+ "smamin1": "152",
399
+ "sohmyn1": "153",
400
+ "spepic1": "154",
401
+ "spodov": "155",
402
+ "spoowl1": "156",
403
+ "sqtbul1": "157",
404
+ "stbkin1": "158",
405
+ "sttwoo1": "159",
406
+ "thbwar1": "160",
407
+ "tibfly3": "161",
408
+ "tilwar1": "162",
409
+ "vefnut1": "163",
410
+ "vehpar1": "164",
411
+ "wbbfly1": "165",
412
+ "wemhar1": "166",
413
+ "whbbul2": "167",
414
+ "whbsho3": "168",
415
+ "whbtre1": "169",
416
+ "whbwag1": "170",
417
+ "whbwat1": "171",
418
+ "whbwoo2": "172",
419
+ "whcbar1": "173",
420
+ "whiter2": "174",
421
+ "whrmun": "175",
422
+ "whtkin2": "176",
423
+ "woosan": "177",
424
+ "wynlau1": "178",
425
+ "yebbab1": "179",
426
+ "yebbul3": "180",
427
+ "zitcis1": "181"
428
+ },
429
+ "layer_norm_eps": 1e-05,
430
+ "layerdrop": 0.0,
431
+ "mask_channel_length": 10,
432
+ "mask_channel_min_space": 1,
433
+ "mask_channel_other": 0.0,
434
+ "mask_channel_prob": 0.0,
435
+ "mask_channel_selection": "static",
436
+ "mask_feature_length": 10,
437
+ "mask_feature_min_masks": 0,
438
+ "mask_feature_prob": 0.0,
439
+ "mask_time_length": 10,
440
+ "mask_time_min_masks": 2,
441
+ "mask_time_min_space": 1,
442
+ "mask_time_other": 0.0,
443
+ "mask_time_prob": 0.05,
444
+ "mask_time_selection": "static",
445
+ "model_type": "wav2vec2",
446
+ "no_mask_channel_overlap": false,
447
+ "no_mask_time_overlap": false,
448
+ "num_adapter_layers": 3,
449
+ "num_attention_heads": 12,
450
+ "num_codevector_groups": 2,
451
+ "num_codevectors_per_group": 320,
452
+ "num_conv_pos_embedding_groups": 16,
453
+ "num_conv_pos_embeddings": 128,
454
+ "num_feat_extract_layers": 7,
455
+ "num_hidden_layers": 12,
456
+ "num_negatives": 100,
457
+ "output_hidden_size": 768,
458
+ "pad_token_id": 0,
459
+ "proj_codevector_dim": 256,
460
+ "tdnn_dilation": [
461
+ 1,
462
+ 2,
463
+ 3,
464
+ 1,
465
+ 1
466
+ ],
467
+ "tdnn_dim": [
468
+ 512,
469
+ 512,
470
+ 512,
471
+ 512,
472
+ 1500
473
+ ],
474
+ "tdnn_kernel": [
475
+ 5,
476
+ 3,
477
+ 3,
478
+ 1,
479
+ 1
480
+ ],
481
+ "torch_dtype": "float32",
482
+ "transformers_version": "4.38.0",
483
+ "use_weighted_layer_sum": false,
484
+ "vocab_size": 32,
485
+ "xvector_output_dim": 512
486
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ebe357f31da38f797e9a4e9c33a601b9980c1601cf09fdf592e50c3d9ae0007
3
+ size 378487416
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": false,
8
+ "sampling_rate": 16000
9
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b10aefa828c7cf7f4fdd8aa18ae0418820315f4879081cda7100fa1d23ac8cc7
3
+ size 4920