# Last edited on 2012-05-05 19:52:58 by stolfilocal # To be included in factor-field-general etc. # Factors a Chinese pinyin text by placing "{}" around # the initial and final consonants (if present), each # medial vowel separately, and the tone/disambiguation suffix. # If there is no suffix, inserts an `{.}' dummy element. function factor_text(x, y,e) { # printf "%s -> ", ("\"" x "\"") > "/dev/stderr"; y = ""; # Extract the initial conconant, if any. Note that "y" and "w" are # not considered consonants, and that the syllable "ng" has only # the final part. if (! match(x, /^[nN]([gG]|$)/)) { if (match(x, /^([cszCSZ][hH]|[b-df-hj-np-txzB-DF-HJ-NP-TXZ])/)) { e = substr(x, RSTART,RLENGTH); x = substr(x, RSTART+RLENGTH); y = (y "{" e "}"); } } # printf "%s -> ", ("\"" y x "\"") > "/dev/stderr"; # Extract the syllable vowels. Note that "yi" and "wu" are single # vowels, and that "ü"/"ê" can be written "u:"/"e^" in some # contexts. while (match(x, /^([yY][iI]|[wW][uU]|[eE][\^]?|[uU][:]?|[aioywAIOYW]|ü|ê|Ü|Ê)/)) { e = substr(x, RSTART,RLENGTH); x = substr(x, RSTART+RLENGTH); y = (y "{" e "}"); } # printf "%s -> ", ("\"" y x "\"") > "/dev/stderr"; # Extract the syllable final, if any. if (match(x, /^([nN][gG]?|[rR])/)) { e = substr(x, RSTART,RLENGTH); x = substr(x, RSTART+RLENGTH); y = (y "{" e "}"); } # printf "%s -> ", ("\"" y x "\"") > "/dev/stderr"; # Parse the tone code (a digit) and the disambiguating suffix # (a `.' followed by one or more digits), as a single element. # Either part can be omitted, including the `.'; but provide # a `.' element in any case. if (x !~ /[.]/) { x = (x "."); } if (match(x, /^[0-9]?[.][0-9]*/)) { e = substr(x, RSTART,RLENGTH); x = substr(x, RSTART+RLENGTH); y = (y "{" e "}"); } # printf "%s -> ", ("\"" y x "\"") > "/dev/stderr"; # Now we must have consumed all of "x": if (x != "") { data_error(("bad pinyin word \"" y x "\"")); } # printf "%s\n", ("\"" y "\"") > "/dev/stderr"; return y; }