Spaces:
Running
on
Zero
Running
on
Zero
# -*- coding: utf-8 -*- | |
# partly derived from an open-source resource provided by Papercup Technologies Limited | |
# Resource-Author: Marlene Staib | |
# Modified by Florian Lux, 2021 | |
# Further modified by Florian Lux, 2022 | |
""" | |
All phonemes in the IPA standard are supported. | |
zero-width characters are generally not supported, as | |
well as some other modifiers. Tone, stress and | |
lengthening are represented with placeholder dimensions, | |
however they need to be set manually, this conversion | |
from phonemes to features works on a character by | |
character basis. In a few cases, the place of | |
articulation is approximated because only one phoneme | |
had such a combination, which does not warrant a new | |
dimension. | |
""" | |
def generate_feature_lookup(): | |
return { | |
'~': {'symbol_type': 'silence'}, | |
'#': {'symbol_type': 'end of sentence'}, | |
'?': {'symbol_type': 'questionmark'}, | |
'!': {'symbol_type': 'exclamationmark'}, | |
'.': {'symbol_type': 'fullstop'}, | |
' ': {'symbol_type': 'word-boundary'}, | |
'ɜ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'central', | |
'vowel_openness' : 'open-mid', | |
'vowel_roundedness': 'unrounded', | |
}, | |
'ə': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'central', | |
'vowel_openness' : 'mid', | |
'vowel_roundedness': 'unrounded', | |
}, | |
'a': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'front', | |
'vowel_openness' : 'open', | |
'vowel_roundedness': 'unrounded', | |
}, | |
'ð': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'dental', | |
'consonant_manner': 'fricative' | |
}, | |
'ɛ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'front', | |
'vowel_openness' : 'open-mid', | |
'vowel_roundedness': 'unrounded', | |
}, | |
'ɪ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'front_central', | |
'vowel_openness' : 'close_close-mid', | |
'vowel_roundedness': 'unrounded', | |
}, | |
'ŋ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'velar', | |
'consonant_manner': 'nasal' | |
}, | |
'ɔ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'back', | |
'vowel_openness' : 'open-mid', | |
'vowel_roundedness': 'rounded', | |
}, | |
'ɒ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'back', | |
'vowel_openness' : 'open', | |
'vowel_roundedness': 'rounded', | |
}, | |
'ɾ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'alveolar', | |
'consonant_manner': 'flap' | |
}, | |
'ʃ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'postalveolar', | |
'consonant_manner': 'fricative' | |
}, | |
'θ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'dental', | |
'consonant_manner': 'fricative' | |
}, | |
'ʊ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'central_back', | |
'vowel_openness' : 'close_close-mid', | |
'vowel_roundedness': 'unrounded' | |
}, | |
'ʌ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'back', | |
'vowel_openness' : 'open-mid', | |
'vowel_roundedness': 'unrounded' | |
}, | |
'ʒ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'postalveolar', | |
'consonant_manner': 'fricative' | |
}, | |
'æ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'front', | |
'vowel_openness' : 'open-mid_open', | |
'vowel_roundedness': 'unrounded' | |
}, | |
'b': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'bilabial', | |
'consonant_manner': 'plosive' | |
}, | |
'ʔ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'glottal', | |
'consonant_manner': 'plosive' | |
}, | |
'd': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'alveolar', | |
'consonant_manner': 'plosive' | |
}, | |
'e': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'front', | |
'vowel_openness' : 'close-mid', | |
'vowel_roundedness': 'unrounded' | |
}, | |
'f': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'labiodental', | |
'consonant_manner': 'fricative' | |
}, | |
'ɡ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'velar', | |
'consonant_manner': 'plosive' | |
}, | |
'h': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'glottal', | |
'consonant_manner': 'fricative' | |
}, | |
'i': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'front', | |
'vowel_openness' : 'close', | |
'vowel_roundedness': 'unrounded' | |
}, | |
'j': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'palatal', | |
'consonant_manner': 'approximant' | |
}, | |
'k': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'velar', | |
'consonant_manner': 'plosive' | |
}, | |
'l': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'alveolar', | |
'consonant_manner': 'lateral-approximant' | |
}, | |
'm': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'bilabial', | |
'consonant_manner': 'nasal' | |
}, | |
'n': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'alveolar', | |
'consonant_manner': 'nasal' | |
}, | |
'ɳ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'retroflex', | |
'consonant_manner': 'nasal' | |
}, | |
'o': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'back', | |
'vowel_openness' : 'close-mid', | |
'vowel_roundedness': 'rounded' | |
}, | |
'p': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'bilabial', | |
'consonant_manner': 'plosive' | |
}, | |
'ɹ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'alveolar', | |
'consonant_manner': 'approximant' | |
}, | |
'r': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'alveolar', | |
'consonant_manner': 'trill' | |
}, | |
's': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'alveolar', | |
'consonant_manner': 'fricative' | |
}, | |
't': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'alveolar', | |
'consonant_manner': 'plosive' | |
}, | |
'u': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'back', | |
'vowel_openness' : 'close', | |
'vowel_roundedness': 'rounded', | |
}, | |
'v': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'labiodental', | |
'consonant_manner': 'fricative' | |
}, | |
'w': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'labial-velar', | |
'consonant_manner': 'approximant' | |
}, | |
'x': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'velar', | |
'consonant_manner': 'fricative' | |
}, | |
'z': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'alveolar', | |
'consonant_manner': 'fricative' | |
}, | |
'ʀ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'uvular', | |
'consonant_manner': 'trill' | |
}, | |
'ø': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'front', | |
'vowel_openness' : 'close-mid', | |
'vowel_roundedness': 'rounded' | |
}, | |
'ç': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'palatal', | |
'consonant_manner': 'fricative' | |
}, | |
'ɐ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'central', | |
'vowel_openness' : 'open', | |
'vowel_roundedness': 'unrounded' | |
}, | |
'œ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'front', | |
'vowel_openness' : 'open-mid', | |
'vowel_roundedness': 'rounded' | |
}, | |
'y': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'front', | |
'vowel_openness' : 'close', | |
'vowel_roundedness': 'rounded' | |
}, | |
'ʏ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'front_central', | |
'vowel_openness' : 'close_close-mid', | |
'vowel_roundedness': 'rounded' | |
}, | |
'ɑ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'back', | |
'vowel_openness' : 'open', | |
'vowel_roundedness': 'unrounded' | |
}, | |
'c': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'palatal', | |
'consonant_manner': 'plosive' | |
}, | |
'ɲ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'palatal', | |
'consonant_manner': 'nasal' | |
}, | |
'ɣ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'velar', | |
'consonant_manner': 'fricative' | |
}, | |
'ʎ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'palatal', | |
'consonant_manner': 'lateral-approximant' | |
}, | |
'β': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'bilabial', | |
'consonant_manner': 'fricative' | |
}, | |
'ʝ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'palatal', | |
'consonant_manner': 'fricative' | |
}, | |
'ɟ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'palatal', | |
'consonant_manner': 'plosive' | |
}, | |
'q': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'uvular', | |
'consonant_manner': 'plosive' | |
}, | |
'ɕ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'alveolopalatal', | |
'consonant_manner': 'fricative' | |
}, | |
'ɭ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'retroflex', | |
'consonant_manner': 'lateral-approximant' | |
}, | |
'ɵ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'central', | |
'vowel_openness' : 'close-mid', | |
'vowel_roundedness': 'rounded' | |
}, | |
'ʑ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'alveolopalatal', | |
'consonant_manner': 'fricative' | |
}, | |
'ʋ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'labiodental', | |
'consonant_manner': 'approximant' | |
}, | |
'ʁ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'uvular', | |
'consonant_manner': 'fricative' | |
}, | |
'ɨ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'central', | |
'vowel_openness' : 'close', | |
'vowel_roundedness': 'unrounded' | |
}, | |
'ʂ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'retroflex', | |
'consonant_manner': 'fricative' | |
}, | |
'ɓ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'bilabial', | |
'consonant_manner': 'implosive' | |
}, | |
'ʙ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'bilabial', | |
'consonant_manner': 'vibrant' | |
}, | |
'ɗ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'dental', | |
'consonant_manner': 'implosive' | |
}, | |
'ɖ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'retroflex', | |
'consonant_manner': 'plosive' | |
}, | |
'χ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'uvular', | |
'consonant_manner': 'fricative' | |
}, | |
'ʛ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'uvular', | |
'consonant_manner': 'implosive' | |
}, | |
'ʟ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'velar', | |
'consonant_manner': 'lateral-approximant' | |
}, | |
'ɽ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'retroflex', | |
'consonant_manner': 'flap' | |
}, | |
'ɢ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'uvular', | |
'consonant_manner': 'plosive' | |
}, | |
'ɠ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'velar', | |
'consonant_manner': 'implosive' | |
}, | |
'ǂ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'alveolopalatal', | |
'consonant_manner': 'click' | |
}, | |
'ɦ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'glottal', | |
'consonant_manner': 'fricative' | |
}, | |
'ǁ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'alveolar', | |
'consonant_manner': 'click' | |
}, | |
'ĩ': { # identical description with i except nasal | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'front', | |
'vowel_openness' : 'close', | |
'vowel_roundedness': 'unrounded', | |
'consonant_manner' : 'nasal' | |
}, | |
'ʍ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'labial-velar', | |
'consonant_manner': 'fricative' | |
}, | |
'ʕ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'pharyngal', | |
'consonant_manner': 'fricative' | |
}, | |
'ɻ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'retroflex', | |
'consonant_manner': 'approximant' | |
}, | |
'ʄ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'palatal', | |
'consonant_manner': 'implosive' | |
}, | |
'ũ': { # identical with u, but nasal | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'back', | |
'vowel_openness' : 'close', | |
'vowel_roundedness': 'rounded', | |
'consonant_manner' : 'nasal' | |
}, | |
'ɤ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'back', | |
'vowel_openness' : 'close-mid', | |
'vowel_roundedness': 'unrounded', | |
}, | |
'ɶ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'front', | |
'vowel_openness' : 'open', | |
'vowel_roundedness': 'rounded', | |
}, | |
'õ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'back', | |
'vowel_openness' : 'close-mid', | |
'vowel_roundedness': 'rounded', | |
'consonant_manner' : 'nasal' | |
}, | |
'ʡ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'epiglottal', | |
'consonant_manner': 'plosive' | |
}, | |
'ʈ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'retroflex', | |
'consonant_manner': 'plosive' | |
}, | |
'ʜ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'epiglottal', | |
'consonant_manner': 'fricative' | |
}, | |
'ɱ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'labiodental', | |
'consonant_manner': 'nasal' | |
}, | |
'ɯ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'back', | |
'vowel_openness' : 'close', | |
'vowel_roundedness': 'unrounded' | |
}, | |
'ǀ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'dental', | |
'consonant_manner': 'click' | |
}, | |
'ɸ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'bilabial', | |
'consonant_manner': 'fricative' | |
}, | |
'ʘ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'bilabial', | |
'consonant_manner': 'click' | |
}, | |
'ʐ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'retroflex', | |
'consonant_manner': 'fricative' | |
}, | |
'ɰ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'velar', | |
'consonant_manner': 'approximant' | |
}, | |
'ɘ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'central', | |
'vowel_openness' : 'close-mid', | |
'vowel_roundedness': 'unrounded' | |
}, | |
'ħ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'pharyngal', | |
'consonant_manner': 'fricative' | |
}, | |
'ɞ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'central', | |
'vowel_openness' : 'open-mid', | |
'vowel_roundedness': 'rounded' | |
}, | |
'ʉ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'vowel', | |
'VUV' : 'voiced', | |
'vowel_frontness' : 'central', | |
'vowel_openness' : 'close', | |
'vowel_roundedness': 'rounded' | |
}, | |
'ɴ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'uvular', | |
'consonant_manner': 'nasal' | |
}, | |
'ʢ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'epiglottal', | |
'consonant_manner': 'fricative' | |
}, | |
'ѵ': { | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'voiced', | |
'consonant_place' : 'labiodental', | |
'consonant_manner': 'flap' | |
}, | |
'ǃ': { # looks deceivingly like an exclamation mark, but it's a different unicode entry | |
'symbol_type' : 'phoneme', | |
'vowel_consonant' : 'consonant', | |
'VUV' : 'unvoiced', | |
'consonant_place' : 'postalveolar', | |
'consonant_manner': 'click' | |
}, | |
} # REMEMBER to also add the phonemes added here to the ID lookup below as the new highest ID | |
def get_phone_to_id(): | |
""" | |
for the states of the ctc loss and dijkstra/mas in the aligner | |
cannot be extracted trivially from above because sets are unordered and the IDs need to be consistent | |
""" | |
phone_to_id = dict() | |
for index, phone in enumerate("~#?!ǃ.ɜəaðɛɪŋɔɒɾʃθʊʌʒæbʔdefghijklmnɳopɡɹrstuvwxzʀøçɐœyʏɑcɲɣʎβʝɟqɕɭɵʑʋʁɨʂɓʙɗɖχʛʟɽɢɠǂɦǁĩʍʕɻʄũɤɶõʡʈʜɱɯǀɸʘʐɰɘħɞʉɴʢѵ"): | |
phone_to_id[phone] = index | |
# the following lines fix an issue with the aligner: While the different punctuation marks have | |
# different effects on their context, their realization in the signal is typically just silence. | |
# Since this is common for all of them, the CTC objective malfunctions for our purposes of | |
# alignment search. So it turned out that it's better to map all punctuation marks to silence. | |
phone_to_id["#"] = phone_to_id["~"] | |
phone_to_id["?"] = phone_to_id["~"] | |
phone_to_id["!"] = phone_to_id["~"] | |
phone_to_id["."] = phone_to_id["~"] | |
return phone_to_id | |
def get_feature_to_index_lookup(): | |
return { | |
# MODIFIER | |
# -- stress: modified by the previous symbol | |
"stressed" : 0, | |
# -- tone: modified by the following symbol | |
"very-high-tone" : 1, | |
"high-tone" : 2, | |
"mid-tone" : 3, | |
"low-tone" : 4, | |
"very-low-tone" : 5, | |
"rising-tone" : 6, | |
"falling-tone" : 7, | |
"peaking-tone" : 8, | |
"dipping-tone" : 9, | |
# -- lengthening: modified by the following symbol | |
"lengthened" : 10, | |
"half-length" : 11, | |
"shortened" : 12, | |
# CATEGORIES | |
"consonant" : 13, | |
"vowel" : 14, | |
"phoneme" : 15, | |
# NON-SPEECH-MARKERS | |
"silence" : 16, | |
"end of sentence" : 17, | |
"questionmark" : 18, | |
"exclamationmark" : 19, | |
"fullstop" : 20, | |
"word-boundary" : 21, | |
# PLACE | |
"dental" : 22, | |
"postalveolar" : 23, | |
"velar" : 24, | |
"palatal" : 25, | |
"glottal" : 26, | |
"uvular" : 27, | |
"labiodental" : 28, | |
"labial-velar" : 29, | |
"alveolar" : 30, | |
"bilabial" : 31, | |
"alveolopalatal" : 32, | |
"retroflex" : 33, | |
"pharyngal" : 34, | |
"epiglottal" : 35, | |
# TONGUE POSITION | |
"central" : 36, | |
"back" : 37, | |
"front_central" : 38, | |
"front" : 39, | |
"central_back" : 40, | |
# MOUTH OPENNESS | |
"mid" : 41, | |
"close-mid" : 42, | |
"close" : 43, | |
"open-mid" : 44, | |
"close_close-mid" : 45, | |
"open-mid_open" : 46, | |
"open" : 47, | |
# MOUTH SHAPE | |
"rounded" : 48, | |
"unrounded" : 49, | |
# MANNER | |
"plosive" : 50, | |
"nasal" : 51, | |
"approximant" : 52, | |
"trill" : 53, | |
"flap" : 54, | |
"fricative" : 55, | |
"lateral-approximant": 56, | |
"implosive" : 57, | |
"vibrant" : 58, | |
"click" : 59, | |
"ejective" : 60, | |
# TYPE | |
"aspirated" : 61, | |
"unvoiced" : 62, | |
"voiced" : 63, | |
} | |
def generate_feature_table(): | |
ipa_to_phonemefeats = generate_feature_lookup() | |
feat_types = set() | |
for ipa in ipa_to_phonemefeats: | |
if len(ipa) == 1: | |
[feat_types.add(feat) for feat in ipa_to_phonemefeats[ipa].keys()] | |
feat_to_val_set = dict() | |
for feat in feat_types: | |
feat_to_val_set[feat] = set() | |
for ipa in ipa_to_phonemefeats: | |
if len(ipa) == 1: | |
for feat in ipa_to_phonemefeats[ipa]: | |
feat_to_val_set[feat].add(ipa_to_phonemefeats[ipa][feat]) | |
# print(feat_to_val_set) | |
value_list = set() | |
for val_set in [feat_to_val_set[feat] for feat in feat_to_val_set]: | |
for value in val_set: | |
value_list.add(value) | |
# print("{") | |
# for index, value in enumerate(list(value_list)): | |
# print('"{}":{},'.format(value,index)) | |
# print("}") | |
value_to_index = get_feature_to_index_lookup() | |
phone_to_vector = dict() | |
for ipa in ipa_to_phonemefeats: | |
if len(ipa) == 1: | |
phone_to_vector[ipa] = [0] * (15 + sum([len(values) for values in [feat_to_val_set[feat] for feat in feat_to_val_set]])) | |
# 15 features come from modifiers, not from lexical sounds, so we have to add them to the ones we encounter naturally in the lexical sounds | |
for feat in ipa_to_phonemefeats[ipa]: | |
if ipa_to_phonemefeats[ipa][feat] in value_to_index: | |
phone_to_vector[ipa][value_to_index[ipa_to_phonemefeats[ipa][feat]]] = 1 | |
if phone_to_vector[ipa][value_to_index["phoneme"]] != 1: | |
# it's not a phoneme, so we give it the silence marker, regardless of what it is. | |
phone_to_vector[ipa][value_to_index["silence"]] = 1 | |
for feat in feat_to_val_set: | |
for value in feat_to_val_set[feat]: | |
if value not in value_to_index: | |
print(f"Unknown feature value in featureset! {value}") | |
# print(f"{sum([len(values) for values in [feat_to_val_set[feat] for feat in feat_to_val_set]])} should be 49") | |
return phone_to_vector | |
if __name__ == '__main__': | |
print(generate_feature_table()) | |