"""Miscellaneous phonological features found in typological databases.
"""
import textwrap
import collections
import typing
from cltoolkit.util import iter_syllables
from .reqs import requires, inventory, graphemes, inventory_with_occurrences
from . import util
[docs]class WithInventory(util.FeatureFunction):
"""
Base class for feature callables requiring access to a phoneme inventory.
"""
def __init__(self, *args, **kw):
super().__init__(*args, **kw)
def run(self, inv):
raise NotImplementedError() # pragma: no cover
@requires(inventory)
def __call__(self, language):
return self.run(language.sound_inventory)
[docs]class InventoryQuery(WithInventory):
"""
Compute the length/sizte of some attribute of a sound inventory.
.. code-block:: python
number_of_consonants = InventoryQuery('consonants')
"""
def __init__(self, attr):
super().__init__(attr)
self.attr = attr
self.rtype = int
self.doc = 'Number of items of type {} in the inventory.'.format(self.attr)
def run(self, inv):
return len(getattr(inv, self.attr))
[docs]class YesNoQuery(WithInventory):
"""
Compute whether an inventory has some property.
.. code-block:: python
has_tones = YesNoQuery('tones')
"""
def __init__(self, attr):
super().__init__(attr)
self.attr = attr
self.rtype = bool
self.doc = 'Does the inventory have {}?'.format(self.attr)
def run(self, inv):
return bool(len(getattr(inv, self.attr)))
[docs]class Ratio(WithInventory):
"""
Computes the ratio between sizes of two properties of an inventory.
"""
def __init__(self, attr1, attr2):
super().__init__(attr1, attr2)
self.attr1 = attr1
self.attr2 = attr2
self.rtype = float
self.doc = 'Ratio between {} and {} in the inventory'.format(self.attr1, self.attr2)
def run(self, inv):
return len(getattr(inv, self.attr1)) / len(getattr(inv, self.attr2))
[docs]class StartsWithSound(util.FeatureFunction):
"""
Check if a language has a form for {} starting with {}.
.. note::
Parametrized instances of this class can be used to check for certain cases of sound
symbolism, or geographic / areal trends in languages to have word forms for certain
concepts starting in certain words.
.. seealso:: :func:`sound_match`
.. code-block:: python
mother_with_m = StartsWithSound(["MOTHER"], [["bilabial", "nasal"]], sound_label='[m]')
"""
def __init__(self,
concepts: typing.List[str],
features: typing.List[typing.List[str]],
concept_label: typing.Optional[str] = None,
sound_label: typing.Optional[str] = None):
"""
:param concepts: List of Concepticon conceptset glosses specifying a (broad) concept.
:param features: List of lists of phonological features to check initial sounds against.
"""
super().__init__(concepts, features, concept_label=concept_label, sound_label=sound_label)
self.concepts = concepts
self.features = features
concept_label = util.concept_label(concepts, label=concept_label)
sound_label = sound_label or str(self.features)
self.rtype = bool
self.doc = textwrap.dedent(
self.__doc__.format(concept_label, sound_label)).split('Note:')[0].strip()
self.categories = {
True: "{} starts with {} or similar".format(concept_label, sound_label),
False: "{} starts with another sound".format(concept_label),
None: "missing data",
}
@requires(graphemes)
def __call__(self, language):
has_forms = False
for concept in self.concepts:
if concept in language.concepts:
for form in language.concepts[concept].forms:
has_forms = True
if sound_match(form.sound_objects[0], self.features):
return True
return False if has_forms else None
[docs]def sound_match(sound, features):
"""
Match a sound by a subset of features.
.. note::
The major idea of this function is to allow for the convenient matching of
some sounds by defining them in terms of a part of their features alone.
E.g., [m] and its variants can be defined as ["bilabial", "nasal"], since
we do not care about the rest of the features.
"""
for feature in features:
if not set(feature).difference(sound.featureset):
return True
return False
# vowel_sound_size = BaseInventoryQuery("vowel_sounds")
# consonant_sound_size = BaseInventoryQuery("consonant_sounds")
# has_tones =YesNoQuery("tones")
[docs]def is_voiced(sound):
"""
Check if a sound is voiced or not.
"""
if sound.obj.phonation == "voiced" or sound.obj.breathiness or sound.obj.voicing:
return True
return False
[docs]def is_glide(sound):
"""Check if sound is a glide or a liquid."""
return sound.manner in {"trill", "approximant", "tap"}
[docs]def is_implosive(sound):
"""
This groups stops and affricates into a group of sounds.
"""
return sound.manner in {"implosive"}
[docs]def stop_like(sound):
"""
This groups stops and affricates into a group of sounds.
"""
return sound.manner in {"stop", "affricate"}
[docs]def is_uvular(sound):
"""
Check if a sound is uvular or not.
"""
return sound.obj.place == "uvular"
def is_ejective(sound):
return sound.obj.ejection
def is_nasal(sound):
return sound.manner == "nasal"
def is_lateral(sound):
return sound.airstream == "lateral"
[docs]class PlosiveFricativeVoicing(WithInventory):
"""
.. seealso:: `WALS 4A - Voicing in Plosives and Fricatives <https://wals.info/feature/4A>`_
"""
categories = {
1: "no voicing contrast",
2: "in plosives alone",
3: "in fricatives alone",
4: "in both plosives and fricatives"
}
def run(self, inv):
voiced = {
sound.manner for sound in inv.consonants if
sound.manner in ['stop', 'fricative'] and is_voiced(sound) # noqa: W504
}
if not voiced:
return 1
if len(voiced) == 2:
return 4
if 'stop' in voiced:
return 2
if 'fricative' in voiced:
return 3
[docs]class HasPtk(WithInventory):
"""
.. seealso:: `WALS 5A - Voicing and Gaps in Plosive Systems <https://wals.info/feature/5A>`_
"""
doc = "WALS Feature 5A, presence of certain sounds."
categories = {
1: "no p and no g in the inventory",
2: "no g in the inventory",
3: "no p in the inventory",
4: "has less than 6 values of [p t t̪ k b d d̪ g]",
5: "has at least 6 values of [p t t̪ k b d d̪ g]"
}
def run(self, inv):
sounds = [sound.obj.s for sound in inv.consonants]
if 'p' not in sounds and 'g' not in sounds:
return 1
if 'g' not in sounds:
return 2
if 'p' not in sounds:
return 3
if len(set([x for x in sounds
if x in ['p', 't', 't̪', 'k', 'b', 'd', 'g', 'd̪']])) >= 6:
return 5
return 4
[docs]class HasUvular(WithInventory):
"""
.. seealso:: `WALS 6A - Uvular Consonants <https://wals.info/feature/6A>`_
"""
categories = {
1: "no uvulars",
2: "has one uvular and this one is a stop",
3: "has one uvular and this one is no stop",
4: "has uvulars"
}
def run(self, inv):
uvulars = set([sound.manner for sound in inv.consonants if is_uvular(sound)])
if len(uvulars) == 0:
return 1
if len(uvulars) == 1:
if 'stop' in uvulars:
return 2
return 3
return 4
[docs]class HasGlottalized(WithInventory):
"""
.. seealso:: `WALS 7A - Glottalized Consonants <https://wals.info/feature/7A>`_
"""
categories = {
1: "no ejectives, no implosives",
2: "has ejective stops or affricates, but no implosives",
3: "has implosive stops or affricates but no ejectives",
4: "has ejectives resonants",
5: "has ejectives and implosives but no ejective resonants",
6: "has ejectives and ejective resonants, but no implosives",
7: "has implosives and ejective resonants but no ejective stops",
8: "has implosvies, ejective resonants, and ejective stops"
}
def run(self, inv):
ejectives = [
sound for sound in inv.consonants if is_ejective(sound) and stop_like(sound)]
resonants = [
sound for sound in inv.consonants if is_ejective(sound) and not stop_like(sound)]
implosives = [sound for sound in inv.consonants if is_implosive(sound)]
if not ejectives and not implosives and not resonants:
return 1
if ejectives and not implosives and not resonants:
return 2
if implosives and not ejectives and not resonants:
return 3
if resonants and not implosives and not ejectives:
return 4
if ejectives and implosives and not resonants:
return 5
if ejectives and resonants and not implosives:
return 6
if implosives and resonants and not ejectives:
return 7
return 8
[docs]class HasLaterals(WithInventory):
"""
.. seealso:: `WALS 8A - Lateral Consonants <https://wals.info/feature/8A>`_
"""
categories = {
1: "no laterals",
2: "only lateral [l]",
3: "has laterals, but no stops in laterals and no [l]",
4: "has laterals, including [l] and stops",
5: "has laterals, inlcuding stops, but no [l]",
6: "has laterals, but no stops and no [l]"
}
def run(self, inv):
laterals = set([sound.obj.manner for sound in inv.consonants if is_lateral(sound)])
if not laterals:
return 1
if len(laterals) == 1 and 'l' in inv.sounds:
return 2
if "affricate" not in laterals and 'stop' not in laterals and 'l' not in inv.sounds:
return 3
if ('stop' in laterals or "affricate" in laterals) and 'l' in inv.sounds:
return 4
if ('stop' in laterals or "affricate" in laterals) and 'l' not in inv.sounds:
return 5
return 6
[docs]class HasEngma(util.FeatureFunction):
"""
.. seealso:: `WALS 9A - The Velar Nasal <https://wals.info/feature/9A>`_
"""
categories = {
1: "velar nasal occurs in syllable-initial position",
2: "velar nasal occurs but not in syllable-initial position",
3: "velar nasal is missing"
}
@requires(inventory_with_occurrences)
def __call__(self, language):
inv = language.sound_inventory
consonants = [sound.obj.s for sound in inv.consonants]
if 'ŋ' in consonants:
for pos, fid in inv.sounds['ŋ'].occurrences:
if pos == 0:
return 1
return 2
return 3
[docs]class HasSoundsWithFeature(WithInventory):
"""
Does the inventory contain at least one {}.
.. code-block:: python
prenasalized_consonants = phonology.HasSoundsWithFeature("consonants", [["pre-nasalized"]])
"""
def __init__(self, attr, features):
super().__init__(attr, features)
self.attr = attr
self.features = features
self.rtype = bool
sound_spec = '{} {}'.format(' or '.join(' '.join(f) for f in self.features), self.attr)
self.doc = textwrap.dedent(self.__doc__.format(sound_spec)).strip()
self.categories = {
True: 'has {}'.format(sound_spec),
False: 'does not have {}'.format(sound_spec),
}
def run(self, inv):
for sound in getattr(inv, self.attr):
for featureset in self.features:
if not set(featureset).difference(sound.featureset):
return True
return False
[docs]class HasRoundedVowels(WithInventory):
"""
.. seealso:: `WALS 11A - Front Rounded Vowels <https://wals.info/feature/11A>`_
"""
categories = {
1: "no high and no mid vowels",
2: "high and mid vowels",
3: "high and no mid vowels",
4: "mid and no high vowels"
}
doc = "WALS Feature 11A, check for front rounded vowels."
def run(self, inv):
high = [
sound for sound in inv.vowels if
sound.obj.roundedness == 'rounded' and sound.obj.centrality in ['front', 'near-front']]
mid = [
sound for sound in inv.vowels if
sound.obj.roundedness == 'rounded' and sound.obj.centrality in ['central']]
if not high and not mid:
return 1
if high and mid:
return 2
if high and not mid:
return 3
return 4
[docs]def syllable_complexity(forms_with_sounds):
"""
Compute the major syllabic patterns for a language.
.. note::
The computation follows the automated syllabification process described in
List (2014) based on sonority. Based on this syllabification, we calculate
the number of consonants preceding the syllable nucleus and those following
it. For a given syllable, we store the form, the consonantal sounds, and
the index of the syllable in the word. These values are returned in the
form of two dictionaries, in which the number of sounds is the key.
"""
preceding, following = collections.defaultdict(list), collections.defaultdict(list)
for form in forms_with_sounds:
idx = 0
sounds_in_form = [s for s in form.sound_objects if s.type != "marker"]
for i, syllable in enumerate(iter_syllables(form)):
sounds, count = [], 0
sounds_in_syllable = []
for token in syllable:
sounds_in_syllable += [sounds_in_form[idx]]
idx += 1
for sound in sounds_in_syllable:
if sound.type not in ['vowel', 'diphthong', 'tone', 'marker'] and \
'syllabic' not in sound.obj.featureset:
count += 1
sounds += [sound]
else:
break
preceding[count] += [(form, sounds, i)]
sounds, count = [], 0
for sound in sounds_in_syllable[::-1]:
if sound.type not in ['vowel', 'diphthong']:
if sound.type not in ['tone', 'marker']:
count += 1
sounds += [sound]
else:
break
following[count] += [(form, sounds, i)]
return preceding, following
[docs]class WithSyllableComplexity(util.FeatureFunction):
def run(self, preceding, following):
raise NotImplementedError() # pragma: no cover
@requires(graphemes)
def __call__(self, language):
return self.run(*syllable_complexity(language.forms_with_sounds))
[docs]class SyllableStructure(WithSyllableComplexity):
"""
.. seealso::
- :func:`syllable_complexity`
- `WALS 12A - Syllable Structure <https://wals.info/feature/12A>`_
"""
categories = {
1: "simple syllable structure (only CV attested)",
2: "moderately complex syllable structure (C(C)VC attested)",
3: "complex syllable structure"
}
def run(self, preceding, following):
p, f = max(preceding), max(following)
if f == 0 and p <= 1:
return 1
if p == 1 and f == 1:
return 2
if p == 2:
for form, sounds, i in preceding[2]:
if not is_glide(sounds[1]):
return 3
return 2
return 3
[docs]class SyllableOnset(WithSyllableComplexity):
"""
.. seealso::
- :func:`syllable_complexity`
- `APiCS 118 - Syllable onsets <https://apics-online.info/parameters/118>`_
"""
categories = {
1: "simple syllable onset (only CV attested)",
2: "moderately complex syllable onset (C(C)V attested)",
3: "complex syllable onset"
}
def run(self, onsets, following):
if max(onsets) <= 1:
return 1
if max(onsets) == 2:
for form, sounds, i in onsets[2]:
if not is_glide(sounds[1]):
return 3
return 2
return 3
[docs]class SyllableOffset(WithSyllableComplexity):
"""
.. seealso::
- :func:`syllable_complexity`
- `APiCS 119 - Syllable codas <https://apics-online.info/parameters/119>`_
"""
categories = {
1: "simple syllable offset (only CV attested)",
2: "moderately complex syllable offset (CVC attested)",
3: "slightly complex syllable offset (CV(C)C attested)",
4: "complex syllable offset"
}
def run(self, onsets, offsets):
if max(offsets) == 0:
return 1
if max(offsets) == 1:
return 2
if max(offsets) == 2:
# important: the representation lists offsets in opposite order, so
# "karb" is rendered as "br"!
for form, sounds, i in offsets[2]:
if is_glide(sounds[1]):
pass
elif is_nasal(sounds[1]):
pass
elif stop_like(sounds[1]) and stop_like(sounds[0]):
pass
else:
return 4
return 3
return 4
[docs]class LacksCommonConsonants(WithInventory):
"""
.. seealso:: `WALS 18A - Absence of Common Consonants <https://wals.info/feature/18A>`_
"""
categories = {
1: "bilabials and fricatives and nasals occur",
2: "bilabials do not occur, fricatives and nasals occur",
3: "fricatives do not occur, bilabials and nasals occur",
4: "nasals do not occur, bilabials and fricatives occur",
5: "bilabials and nasals do not occur, fricatives occur",
6: "all other cases"
}
def run(self, inv):
bilabials = [
sound for sound in inv.consonants if 'bilabial' in sound.obj.featureset]
fricatives = [
sound for sound in inv.consonants if 'fricative' in sound.obj.featureset]
nasals = [
sound for sound in inv.consonants if 'nasal' in sound.obj.featureset]
if bilabials and fricatives and nasals:
return 1
if not bilabials and fricatives and nasals:
return 2
if not fricatives and bilabials and nasals:
return 3
if not nasals and bilabials and fricatives:
return 4
if not bilabials and not nasals and fricatives:
return 5
return 6
[docs]class HasUncommonConsonants(WithInventory):
"""
.. seealso:: `WALS 19A - Presence of Uncommon Consonants <https://wals.info/feature/19A>`_
"""
categories = {
1: "no clicsk and no dental fricatives and no labiovelars and no pharyngeals",
2: "clicks and pharyngeals and dental fricatives",
3: "pharyngeals and dental fricatives",
4: "dentral fricatives",
5: "pharyngeals",
6: "labiovelars",
7: "clicks"
}
def run(self, inv):
clicks = [sound for sound in inv.consonants if sound.obj.manner == "click"]
labiovelars = [
sound for sound in inv.consonants if sound.obj.labialization == # noqa: W504
"labialized" and sound.obj.place in ["velar", "uvular"]]
dentalfrics = [
sound for sound in inv.consonants if sound.obj.place == "dental"
and not sound.obj.airstream == "sibilant" and sound.obj.manner == "fricative"]
pharyngeals = [
sound for sound in inv.consonants if
sound.obj.place == "pharyngeal" or # noqa: W504
sound.obj.pharyngealization == "pharyngealized"]
if not clicks and not dentalfrics and not labiovelars and not pharyngeals:
return 1
if clicks and pharyngeals and dentalfrics:
return 6
if pharyngeals and dentalfrics:
return 7
if dentalfrics:
return 5
if pharyngeals:
return 4
if labiovelars:
return 3
if clicks:
return 2