NbAiLab
/

wav2vec2-large-voxrex-npsc-nst-bokmaal-fixed

Automatic Speech Recognition

Generated from Trainer

Inference Endpoints

Model card Files Files and versions Community

versae commited on Aug 4, 2022

Commit

b2588d0

•

1 Parent(s): 344dea8

Update eval.py

Files changed (1) hide show

eval.py +5 -5

eval.py CHANGED Viewed

@@ -57,7 +57,7 @@ def normalize_text(text: str, dataset: str) -> str:
     if dataset.lower().endswith("nst"):
         text = text.lower()
-        text = text.replace("(...Vær stille under dette opptaket...)", "")
         text = re.sub('[áàâ]', 'a', text)
         text = re.sub('[ä]', 'æ', text)
         text = re.sub('[éèëê]', 'e', text)
@@ -78,10 +78,10 @@ def normalize_text(text: str, dataset: str) -> str:
         text = re.sub('[ç]', 'c', text)
         text = re.sub('[úùüû]', 'u', text)
         text = re.sub('\s', ' ', text)
-    text = re.sub("<ee(eh)?>", "e", text)
-    text = re.sub("<mmm?>", "m", text)
-    text = re.sub("<qq>", "q", text)
-    text = re.sub("<inaudible>", "i", text)
     # # In addition, we can normalize the target text, e.g. removing new lines characters etc...
     # # note that order is important here!

     if dataset.lower().endswith("nst"):
         text = text.lower()
+        text = text.replace("(...vær stille under dette opptaket...)", "")
         text = re.sub('[áàâ]', 'a', text)
         text = re.sub('[ä]', 'æ', text)
         text = re.sub('[éèëê]', 'e', text)
         text = re.sub('[ç]', 'c', text)
         text = re.sub('[úùüû]', 'u', text)
         text = re.sub('\s', ' ', text)
+    text = re.sub("<ee(eh)?>", "ĥ", text)
+    text = re.sub("<mmm?>", "ĥ", text)
+    text = re.sub("<qq>", "ĥ", text)
+    text = re.sub("<inaudible>", "ĥ", text)
     # # In addition, we can normalize the target text, e.g. removing new lines characters etc...
     # # note that order is important here!