Skip to content

Instantly share code, notes, and snippets.

@alpha123
Created June 30, 2017 05:17
Show Gist options
  • Save alpha123/e8507c69e5c9867448761d0953c38f02 to your computer and use it in GitHub Desktop.
Save alpha123/e8507c69e5c9867448761d0953c38f02 to your computer and use it in GitHub Desktop.
Ruby IPA parser
# encoding: utf-8
require 'rubygems'
gem 'ruby-enum'
gem 'unicode'
require 'ruby-enum'
require 'unicode'
module IPA
Consonants = [
['m', '', '', 'n', 'n', 'ɳ', 'ȵ', 'ɲ', 'ŋ', 'ɴ', '' ],
['b', '', 'd̪', 'd', 'd', 'ɖ', 'ȡ', 'ɟ', 'g', 'ɢ', 'ʔ'],
['', '', '', 'z', 'ʒ', 'ʐ', 'ʑ', '', '', '', '' ],
['β', 'v', 'ð', 'ð̠', 'ɹ̠˔','ɻ˔','ʝ̟', 'ʝ', 'ɣ', 'ʁ', 'ɦ'],
['', '', '', 'ɮ', '', 'ɭ˔','', 'ʎ̝', 'ʟ̝', '', ''],
['β̞', 'ʋ', 'ð̞', 'ɹ', 'ɹ̠', 'ɻ', '', 'j', 'ɰ', 'ʁ̞', ''],
['', '', 'l̪', 'l', 'l̠', 'ɭ', 'ȴ', 'ʎ', 'ʟ', 'ʟ̠', ''],
['ⱱ̟', 'ⱱ', '', 'ɾ', '', 'ɽ', '', '', '', '', ''],
['ʙ', '', '', 'r', '', 'ɽ͡r','', '', '', 'ʀ', '']
].map! { |p| p.freeze }.freeze
FlatConsonants = Consonants.flatten.freeze
ConsonantsRowLen = Consonants[0].size
VoicingPairs = {
'p' => 'b', 't' => 'd', 'ʈ' => 'ɖ', 'ȶ' => 'ȡ', 'c' => 'ɟ', 'k' => 'g', 'q' => 'ɢ', 'ʔ' => 'ʔ',
'ɸ' => 'β', 'f' => 'v', 'θ' => 'ð', 'ç' => 'ʝ', 'x' => 'ɣ', 'χ' => 'ʁ', 'h' => 'ɦ',
's' => 'z', 'ʃ' => 'ʒ', 'ʂ' => 'ʐ', 'ɕ' => 'ʑ',
'ɬ' => 'ɮ',
'ʍ' => 'w'
}.freeze
VoicingPairsInv = VoicingPairs.invert.freeze
ConsonantModifiers = {
:aspirated => 'ʰ', :ejective => 'ʼ', :palatalized => 'ʲ', :velarized => 'ˠ', :labialized => 'ʷ'
}.freeze
Vowels = [
['i', '', 'ɨ', '', 'ɯ'],
['' , 'ɪ', 'ɪ̈', 'ɯ̽', '' ],
['e', '', 'ɘ', '', 'ɤ'],
['e̞', '', 'ə', '', 'ɤ̞'],
['' , 'ɛ', 'ɜ', '', 'ʌ'],
['' , 'æ', 'ɐ', '', '',],
['' , 'a', 'ä', '', 'ɑ']
].map! { |h| h.freeze }.freeze
FlatVowels = Vowels.flatten.freeze
VowelsRowLen = Vowels[0].size
RoundingPairs = {
'i' => 'y', 'ɨ' => 'ʉ', 'ɯ' => 'u',
'ɪ' => 'ʏ', 'ɪ̈' => 'ʊ̈', 'ɯ̽' => 'ʊ',
'e' => 'ø', 'ɘ' => 'ɵ', 'ɤ' => 'o',
'e̞' => 'ø̞', 'ə' => 'ɵ̞', 'ɤ̞' => 'o̞',
'ɛ' => 'œ', 'ɜ' => 'ɞ', 'ʌ' => 'ɔ',
'æ' => '' , 'ɐ' => 'ɞ̞',
'a' => 'ɶ', 'ä' => 'ɒ̈', 'ɑ' => 'ɒ'
}.freeze
RoundingPairsInv = RoundingPairs.invert.freeze
class MOA
include Ruby::Enum
define :Nasal, 0
define :Stop, 1
define :SibilantFricative, 2
define :NonSibilantFricative, 3
define :LateralFricative, 4
define :Approximant, 5
define :LateralApproximant, 6
define :Flap, 7
define :Trill, 8
end
class POA
include Ruby::Enum
define :Bilabial, 0
define :Labiodental, 1
define :Dental, 2
define :Alveolar, 3
define :PalatoAlveolar, 4
define :Retroflex, 5
define :AlveoloPalatal, 6
define :Palatal, 7
define :Velar, 8
define :Uvular, 9
define :Glottal, 10
end
class Consonant
attr_accessor :place, :manner, :voiced, :aspirated, :ejective, :palatalized, :velarized, :labialized, :long
alias :voiced? :voiced
alias :aspirated? :aspirated
alias :ejective? :ejective
alias :palatalized? :palatalized
alias :velarized? :velarized
alias :labialized? :labialized
alias :long? :long
def initialize(poa, moa)
@place = poa
@manner = moa
@affricate_type = :none
ConsonantModifiers.keys.each { |mod| send(:"#{mod}=", false) }
@long = false
end
def affricate_type; @affricate_type; end
def affricate_type=(type)
@affricate_type = type
@manner = Stop
end
def lateral_affricate; @affricate_type == :lateral; end
def lateral_affricate=(yesno)
affricate_type = yesno ? :lateral : :none
end
def sibilant_affricate; @affricate_type == :sibilant; end
def sibilant_affricate=(yesno)
affricate_type = yesno ? :sibilant : :none
end
def non_sibilant_affricate; @affricate_type == :non_sibilant; end
def non_sibilant_affricate=(yesno)
affricate_type = yesno ? :non_sibilant : :none
end
alias lateral_affricate? lateral_affricate
alias sibilant_affricate? sibilant_affricate
alias non_sibilant_affricate? non_sibilant_affricate
def to_s
base = Consonants[@manner][@place]
unless voiced? then
base = base[1..-1] + (VoicingPairsInv[base[0]] or base[0] + '̥')
end
unless @affricate_type == :none then
release = Consonant.new(@place, case @affricate_type
when :lateral; POA::LateralFricative
when :sibilant; POA::SibilantFricative
when :non_sibilant; POA::NonSibilantFricative
end)
release.voiced = @voiced
base += "͡"
base += release.to_s
end
base + ConsonantModifiers.inject('') do |modifiers, (prop, diacritic)|
modifiers + diacritic * (send(prop) ? 1 : 0)
end + (long? ? 'ː' : '')
end
def ==(other)
@place == other.place and @manner == other.manner and @affricate_type == other.affricate_type and
@long == other.long and ConsonantModifiers.inject(true) do |cond, modifier|
cond and send(modifier) == other.send(modifier)
end
end
end
class Height
include Ruby::Enum
define :Close, 0
define :NearClose, 1
define :CloseMid, 2
define :Mid, 3
define :OpenMid, 4
define :NearOpen, 5
define :Open, 6
end
class Backness
include Ruby::Enum
define :Front, 0
define :NearFront, 1
define :Central, 2
define :NearBack, 3
define :Back, 4
end
class Vowel
attr_accessor :height, :backness, :rounded, :tongue_root, :nasalized, :rhotic, :long
alias :rounded? :rounded
alias :nasalized? :nasalized
alias :rhotic? :rhotic
alias :long? :long
def initialize(height_, backness_, rounded_)
@height = height_
@backness = backness_
@rounded = rounded_
@tongue_root = :normal
@nasalized = false
@rhotic = false
@long = false
end
def to_s
v = Vowels[@height][@backness]
if rounded? then
v = RoundingPairs[base]
end
if nasalized? then
v += '̃';
end
v += case @tongue_root
when :advanced; '̘'
when :retracted; '̙'
else; ''
end
if rhotic? then
v += '˞'
end
if long? then
v += 'ː'
end
v
end
def ==(other)
@height == other.height and @backness == other.backness and @tongue_root == other.tongue_root and
@nasalized == other.nasalized and @rhotic == other.rhotic and @long == other.long
end
end
def self.articulation_of(ipa)
is_vowel = true
voiced_or_rounded = false
rowlen = VowelsRowLen
idx = FlatVowels.index ipa
if idx.nil? then
idx = FlatVowels.index RoundingPairs[ipa]
if idx.nil? then
is_vowel = false
rowlen = ConsonantsRowLen
voiced_or_rounded = true
idx = FlatConsonants.index ipa
if idx.nil? then
idx = FlatConsonants.index VoicingPairs[ipa]
voiced_or_rounded = false
end
else
voiced_or_rounded = true
end
end
[is_vowel, idx / rowlen, idx % rowlen, voiced_or_rounded]
end
private_class_method :articulation_of
def self.parse_single(ipa)
base, *modifiers = ipa
atr = base.index('̘') != nil
rtr = base.index('̙') != nil
nasal = base.index('̃') != nil
ainfo = articulation_of(base.gsub(/̘|̙|̃|˞/, ''))
if ainfo[0] then # Is vowel
v = Vowel.new ainfo[1], ainfo[2], ainfo[3]
v.tongue_root = atr ? :advanced : rtr ? :retracted : :normal
v.nasalized = nasal
v.rhotic = modifiers.include? '˞'
v.long = modifiers.include? 'ː'
return v
end
c = Consonant.new ainfo[2], ainfo[1]
c.voiced = ainfo[3]
if modifiers.size > 0 and Unicode.abbr_categories(modifiers[0]) == :Ll then
release = modifiers.shift
c.affricate_type = case articulation_of(release)[0]
when POA::LateralFricative; :lateral
when POA::SibilantFricative; :sibilant
when POA::NonSibilantFricative; :non_sibilant
end
end
ConsonantModifiers.each do |prop, char|
if modifiers.include? char then
c.send "#{prop}=", true
end
end
if modifiers.include? 'ː' then
c.long = true
end
c
end
def self.parse(ipa)
if ipa.is_a? Regexp then
ipa = ipa.source
else
ipa = ipa.to_s
end
# Separate the array of graphemes into contiguous subarrays containing a letter and modifiers
# Lm and Sk are Unicode categories modifier letter and modifier symbol
graphemes = Unicode.text_elements ipa
groups = graphemes.inject([]) do |grps, grapheme|
group = grps.last
if !(Unicode.abbr_categories(grapheme) & [:Lm, :Sk]).empty? then
group << grapheme
elsif group and group.last[-1] == '͡'.force_encoding('utf-8') then
group << grapheme
else
group = [grapheme]
grps << group
end
grps
end
groups.map(&method(:parse_single))
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment