Update eval.py
Browse files
eval.py
CHANGED
@@ -57,7 +57,7 @@ def normalize_text(text: str, dataset: str) -> str:
|
|
57 |
|
58 |
if dataset.lower().endswith("nst"):
|
59 |
text = text.lower()
|
60 |
-
text = text.replace("(...
|
61 |
text = re.sub('[áàâ]', 'a', text)
|
62 |
text = re.sub('[ä]', 'æ', text)
|
63 |
text = re.sub('[éèëê]', 'e', text)
|
@@ -78,10 +78,10 @@ def normalize_text(text: str, dataset: str) -> str:
|
|
78 |
text = re.sub('[ç]', 'c', text)
|
79 |
text = re.sub('[úùüû]', 'u', text)
|
80 |
text = re.sub('\s', ' ', text)
|
81 |
-
text = re.sub("<ee(eh)?>", "
|
82 |
-
text = re.sub("<mmm?>", "
|
83 |
-
text = re.sub("<qq>", "
|
84 |
-
text = re.sub("<inaudible>", "
|
85 |
|
86 |
# # In addition, we can normalize the target text, e.g. removing new lines characters etc...
|
87 |
# # note that order is important here!
|
|
|
57 |
|
58 |
if dataset.lower().endswith("nst"):
|
59 |
text = text.lower()
|
60 |
+
text = text.replace("(...vær stille under dette opptaket...)", "")
|
61 |
text = re.sub('[áàâ]', 'a', text)
|
62 |
text = re.sub('[ä]', 'æ', text)
|
63 |
text = re.sub('[éèëê]', 'e', text)
|
|
|
78 |
text = re.sub('[ç]', 'c', text)
|
79 |
text = re.sub('[úùüû]', 'u', text)
|
80 |
text = re.sub('\s', ' ', text)
|
81 |
+
text = re.sub("<ee(eh)?>", "ĥ", text)
|
82 |
+
text = re.sub("<mmm?>", "ĥ", text)
|
83 |
+
text = re.sub("<qq>", "ĥ", text)
|
84 |
+
text = re.sub("<inaudible>", "ĥ", text)
|
85 |
|
86 |
# # In addition, we can normalize the target text, e.g. removing new lines characters etc...
|
87 |
# # note that order is important here!
|