#! /usr/bin/gawk -f # Last edited on 1999-12-08 23:26:28 by stolfi # Reads a stream of words, factors them into `Grove elements' /^ *$/{next;} /./{ # Break word into elements: $0 = grove_factor_text($0); # Delete dummy (empty) factors: gsub(/{[_]*}/, "", $0); gsub(/[_][_]*/, "", $0); # Insert breaks between elements: gsub(/[}{]/, "}\n{", $0); # Delete braces: gsub(/[{}]*/, "", $0); # Let empty strings stay empty, so that they are discarded. print $0; } function error(msg) { printf "line %d: %s\n", NR, msg >> "/dev/stderr"; abort = 1; exit 1; } function grove_factor_text(x, y,e) { # Decomposes "x" into Grove-style elements with "e"s and "i"s # attached to the following letter. Assumes "x" is uncapitalized EVA # without "%"-fillers. Allows EVA spaces and "/". # Removes comments and fillers, just in case: gsub(/{[^{}]*}/, "", x); gsub(/[!]/, "", x); if (match(x, /[^-=\/,. *?%a-z]/)) { error(("invalid char in word \"" x "\"")); } # Map "sh" "ch" to single letters to simplify the parsing. gsub(/ch/, "C", x); gsub(/sh/, "S", x); # Map platformed and half-platformed letters to capitals to simplify the parsing: gsub(/ckh/, "K", x); gsub(/cth/, "T", x); gsub(/cfh/, "F", x); gsub(/cph/, "P", x); # gsub(/ikh/, "G", x); gsub(/ith/, "H", x); gsub(/ifh/, "M", x); gsub(/iph/, "N", x); # gsub(/ck/, "U", x); gsub(/ct/, "V", x); gsub(/cf/, "X", x); gsub(/cp/, "Y", x); # gsub(/ik/, "A", x); gsub(/it/, "B", x); gsub(/if/, "I", x); gsub(/ip/, "J", x); y = ""; while (x != "") { # printf "x = [%s]\n", x > "/dev/stderr"; if (match(x, /^[-=\/,. ]+/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); y = ( y e ); } else { # split off initial if any: if (match(x, /^[q]/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); } else { e = "_"; } y = ( y "{" e "}"); while (1) { # copy next letter, with any [ie] prefixes and [h] suffixes. if (match(x, /^[ie]*[^-=\/,. ][h]*/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); } else { break; } y = ( y "{" e "}"); } } } # Unfold letter folding: gsub(/A/, "ik", y); gsub(/B/, "it", y); gsub(/I/, "if", y); gsub(/J/, "ip", y); # gsub(/U/, "ck", y); gsub(/V/, "ct", y); gsub(/X/, "cf", y); gsub(/Y/, "cp", y); # gsub(/G/, "ikh", y); gsub(/H/, "ith", y); gsub(/M/, "ifh", y); gsub(/N/, "iph", y); # gsub(/K/, "ckh", y); gsub(/T/, "cth", y); gsub(/P/, "cph", y); gsub(/F/, "cfh", y); # gsub(/C/, "ch", y); gsub(/S/, "sh", y); return y; }