#! /usr/bin/gawk -f # Last edited on 1999-12-09 20:57:27 by stolfi # Reads a stream of words, one per line; outouts their "cec" # features, one per line. /^ *$/{next;} /./{ # Delete leading "q": gsub(/^[q]/, "", $0); # Condense "ch","sh", and platforms gallows into single letters: gsub(/ckh/, "K", $0); gsub(/cth/, "T", $0); gsub(/cph/, "P", $0); gsub(/cfh/, "F", $0); gsub(/ikh/, "A", $0); gsub(/ith/, "B", $0); gsub(/iph/, "D", $0); gsub(/ifh/, "G", $0); gsub(/ck/, "U", $0); gsub(/ct/, "V", $0); gsub(/cp/, "W", $0); gsub(/cf/, "Y", $0); gsub(/sh/, "S", $0); gsub(/ch/, "C", $0); # Double the non-"e" letters and insert breaks between them: $0 = gensub(/([^e])/, ":\\1\n\\1:", "g", $0); # Insert "_" around every word: gsub(/$/, ":_", $0); gsub(/^/, "_:", $0); # Expand back the compound letters: gsub(/S/, "sh", $0); gsub(/C/, "ch", $0); gsub(/U/, "ck", $0); gsub(/V/, "ct", $0); gsub(/W/, "cp", $0); gsub(/Y/, "cf", $0); gsub(/A/, "ikh", $0); gsub(/B/, "ith", $0); gsub(/D/, "iph", $0); gsub(/G/, "ifh", $0); gsub(/K/, "ckh", $0); gsub(/T/, "cth", $0); gsub(/P/, "cph", $0); gsub(/F/, "cfh", $0); print $0; }