# Last edited on 1999-12-10 08:04:13 by stolfi # Usage: "gawk -f factor-elements.gawk ..." function factor_text(x, y,e) { # Decomposes "x" into its QOKOKOKOF elements, separated by ":". # Assumes "x" is uncapitalized EVA without comments and fillers. # EVA spaces and "/" are allowed. gsub(/{[^{}]*}/, "", x); gsub(/[!]/, "", x); if (match(x, /[^-=\/,. *?%a-z]/)) { error(("invalid char in word \"" x "\"")); } # Map "sh" "ch" "ee" to single letters to simplify the parsing. gsub(/ch/, "C", x); gsub(/sh/, "S", x); gsub(/ee/, "E", x); y = ""; while (x != "") { # printf "x = [%s]\n", x > "/dev/stderr"; if (match(x, /^[-=\/,. ]+/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); y = ( y e ); } else { # split off initial if any: if (match(x, /^[q]/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); } else { e = "_"; } y = ( y "{" e "}"); while (1) { # split off "[aoy]" group with eventual [ci] prefix and [he] suffix if (match(x, /^[ic]*[aoy][aoyeh]*/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); } else { e = "_"; } y = ( y e ); # copy next main letter with [ci] prefix and [he] suffix if (match(x, /^[ic]*[^-=\/,. aoy][he]*/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); } else { break; } y = ( y "{" e "}"); } } } # Unfold letter folding: gsub(/E/, "ee", y); gsub(/C/, "ch", y); gsub(/S/, "sh", y); return y; }