#! /usr/bin/gawk -f # Last edited on 2000-01-23 17:43:13 by stolfi # Reads a stream of words, one per line. Extracts # the "xox" features. /^ *$/{next;} /./{ # Break word into elements: $0 = factor_text($0); # Delete dummy (empty) factors: gsub(/{[_]*}/, "", $0); gsub(/[_][_]*/, "", $0); # Erase "Q" prefixes gsub(/^[{][q][}]/, "", $0); # Provide temporary word delimiters: gsub(/^/, "#}", $0); gsub(/$/, "{#", $0); # Replace tables by "X" and gallows by M"": gsub(/[{][ci][ktpf][h]*[e]?[}]/, "{M}", $0); gsub(/[{][ktpf][e]?[}]/, "{M}", $0); gsub(/[{]ee[e]?[}]/, "{X}", $0); gsub(/[{][cs]h[e]?[}]/, "{X}", $0); # Map dealers and finals to "R": gsub(/[{]i*[dlrsvxnmjg][e]?[}]/, "{R}", $0); # Map [aoy] to "o": # gsub(/[aoy]/, "o", $0); # provide "_" for empty [aoy] elements:: gsub(/[}][{]/, "}_{", $0); # Duplicate internal non-[aoy] elements, break lines: $0 = gensub(/[{]([^{}]*)[}]/, "{\\1}\n{\\1}", "g", $0); # Add outer braces: gsub(/^/, "{", $0); gsub(/$/, "}", $0); # Replace braces by ":" gsub(/[{}]/, ":", $0); print $0; }