ceyda
commited on
Commit
•
3dee02c
1
Parent(s):
ab8048e
fix attention flag
Browse files- README.md +7 -3
- preprocessor_config.json +1 -1
- pytorch_model.bin +1 -1
- tokenizer_config.json +1 -1
- vocab.json +1 -39
README.md
CHANGED
@@ -23,7 +23,7 @@ model-index:
|
|
23 |
metrics:
|
24 |
- name: Test WER
|
25 |
type: wer
|
26 |
-
value:
|
27 |
---
|
28 |
|
29 |
# Wav2Vec2-Base-760-Turkish
|
@@ -102,11 +102,13 @@ test_dataset = test_dataset.map(speech_file_to_array_fn)
|
|
102 |
|
103 |
# Preprocessing the datasets.
|
104 |
# We need to read the aduio files as arrays
|
|
|
|
|
105 |
def evaluate(batch):
|
106 |
inputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
|
107 |
|
108 |
with torch.no_grad():
|
109 |
-
logits = model(inputs.input_values.to("cuda")
|
110 |
|
111 |
pred_ids = torch.argmax(logits, dim=-1)
|
112 |
batch["pred_strings"] = processor.batch_decode(pred_ids,skip_special_tokens=True)
|
@@ -117,7 +119,9 @@ result = test_dataset.map(evaluate, batched=True, batch_size=8)
|
|
117 |
print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
|
118 |
```
|
119 |
|
120 |
-
**Test
|
|
|
|
|
121 |
|
122 |
|
123 |
## Training
|
|
|
23 |
metrics:
|
24 |
- name: Test WER
|
25 |
type: wer
|
26 |
+
value: 22.60
|
27 |
---
|
28 |
|
29 |
# Wav2Vec2-Base-760-Turkish
|
|
|
102 |
|
103 |
# Preprocessing the datasets.
|
104 |
# We need to read the aduio files as arrays
|
105 |
+
|
106 |
+
#Attention mask is not used because the base-model was not trained with it. reference: https://github.com/huggingface/transformers/blob/403d530eec105c0e229fc2b754afdf77a4439def/src/transformers/models/wav2vec2/tokenization_wav2vec2.py#L305
|
107 |
def evaluate(batch):
|
108 |
inputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
|
109 |
|
110 |
with torch.no_grad():
|
111 |
+
logits = model(inputs.input_values.to("cuda")).logits
|
112 |
|
113 |
pred_ids = torch.argmax(logits, dim=-1)
|
114 |
batch["pred_strings"] = processor.batch_decode(pred_ids,skip_special_tokens=True)
|
|
|
119 |
print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
|
120 |
```
|
121 |
|
122 |
+
**Test Results**:
|
123 |
+
- WER: 22.602390
|
124 |
+
- CER: 6.054137
|
125 |
|
126 |
|
127 |
## Training
|
preprocessor_config.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"do_normalize": true,
|
3 |
"feature_size": 1,
|
4 |
"padding_side": "right",
|
5 |
-
"padding_value": 0
|
6 |
"return_attention_mask": true,
|
7 |
"sampling_rate": 16000
|
8 |
}
|
|
|
2 |
"do_normalize": true,
|
3 |
"feature_size": 1,
|
4 |
"padding_side": "right",
|
5 |
+
"padding_value": 0,
|
6 |
"return_attention_mask": true,
|
7 |
"sampling_rate": 16000
|
8 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 377691502
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:adeefd83b89a25212c0d6c74b43b28e367e54cc7fbce63599927f7bc6d2b8ae9
|
3 |
size 377691502
|
tokenizer_config.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>", "do_lower_case": false, "word_delimiter_token": "|"
|
|
|
1 |
+
{"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>", "do_lower_case": false, "word_delimiter_token": "|"}
|
vocab.json
CHANGED
@@ -1,39 +1 @@
|
|
1 |
-
{"|": 4,
|
2 |
-
"p": 5,
|
3 |
-
"i": 6,
|
4 |
-
"r": 7,
|
5 |
-
"n": 8,
|
6 |
-
"s": 9,
|
7 |
-
"ö": 10,
|
8 |
-
"z": 11,
|
9 |
-
"l": 12,
|
10 |
-
"e": 13,
|
11 |
-
"h": 14,
|
12 |
-
"â": 15,
|
13 |
-
"y": 16,
|
14 |
-
"a": 17,
|
15 |
-
"k": 18,
|
16 |
-
"ı": 19,
|
17 |
-
"o": 20,
|
18 |
-
"m": 21,
|
19 |
-
"ü": 22,
|
20 |
-
"g": 23,
|
21 |
-
"c": 24,
|
22 |
-
"b": 25,
|
23 |
-
"ş": 26,
|
24 |
-
"d": 27,
|
25 |
-
"u": 28,
|
26 |
-
"t": 29,
|
27 |
-
"ç": 30,
|
28 |
-
"ğ": 31,
|
29 |
-
"v": 32,
|
30 |
-
"f": 33,
|
31 |
-
"j": 34,
|
32 |
-
"x": 35,
|
33 |
-
"w": 36,
|
34 |
-
"q": 37,
|
35 |
-
"î": 38,
|
36 |
-
"<s>": 0,
|
37 |
-
"<pad>": 1,
|
38 |
-
"</s>": 2,
|
39 |
-
"<unk>": 3}
|
|
|
1 |
+
{"<s>": 0, "<pad>": 1, "</s>": 2, "<unk>": 3, "|": 4, "p": 5, "i": 6, "r": 7, "n": 8, "s": 9, "ö": 10, "z": 11, "l": 12, "e": 13, "h": 14, "â": 15, "y": 16, "a": 17, "k": 18, "ı": 19, "o": 20, "m": 21, "ü": 22, "g": 23, "c": 24, "b": 25, "ş": 26, "d": 27, "u": 28, "t": 29, "ç": 30, "ğ": 31, "v": 32, "f": 33, "j": 34, "x": 35, "w": 36, "q": 37, "î": 38}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|