#! /usr/bin/gawk -f # Last edited on 1999-12-10 18:42:16 by stolfi # Reads a stream of words, extracts their `mantles', # smashing the gallows letters to "M". # Must be called with "-f factor-text.gawk". /^ *$/{next;} /./{ # Break word into elements: $0 = factor_text($0); # Delete dummy (empty) factors: gsub(/{[_]*}/, "", $0); gsub(/[_][_]*/, "", $0); # Map all O-elements to "." gsub(/[}][ci]*[aoy][aoyhe]*[{]/, "}.{", $0); gsub(/[}][ci]*[aoy][aoyhe]*$/, "}.", $0); gsub(/^[ci]*[aoy][aoyhe]*[{]/, ".{", $0); gsub(/^[ci]*[aoy][aoyhe]*$/, ".", $0); # Smash non-mantle, non-core elements to "-": gsub(/[{][s][}]/, "-", $0); gsub(/[{][^{}]*[^icshektpf{}][^{}]*[}]/, "-", $0); # Smash all gallows letters to "M": gsub(/[ktpf]/, "M", $0); # Delete prefixed or suffixed "-" and ".": gsub(/^[-.][-.]*/, "", $0); gsub(/[-.][-.]*$/, "", $0); # Collapse multiple "-" and ".": gsub(/[-.]*[-][-.]*/, "-", $0); gsub(/[.][.][.]*/, ".", $0); # Delete braces: gsub(/[{}]*/, "", $0); # Replace empty strings by "_" so that they are counted gsub(/^ *$/, "_", $0); print $0; } function error(msg) { printf "line %d: %s\n", NR, msg >> "/dev/stderr"; abort = 1; exit 1; }