#! /usr/bin/gawk -f # Last edited on 1999-12-11 03:04:05 by stolfi # Reads a stream of words, extracts their `crusts' /^ *$/{next;} /./{ # Break word into elements: $0 = factor_text($0); # Delete dummy (empty) factors: gsub(/{[_]*}/, "", $0); gsub(/[_][_]*/, "", $0); # Smash all O-elements to "." gsub(/[}][ci]*[aoy][aoyhe]*[{]/, "}.{", $0); gsub(/[}][ci]*[aoy][aoyhe]*$/, "}.", $0); gsub(/^[ci]*[aoy][aoyhe]*[{]/, ".{", $0); gsub(/^[ci]*[aoy][aoyhe]*$/, ".", $0); # Erase the "q" prefix, if any: gsub(/^[{][q][}]/, "", $0); # Differentiate "sh" and "se" from "s" alone: gsub(/[s][h]/, "Sh", $0); gsub(/[s][e]/, "Se", $0); # Smash mantle and core elements to "-": gsub(/[{][icShektpf]*[}]/, "-", $0); # Erase "i" modifiers from crust elements: gsub(/[{][i]*/, "{", $0); # Delete prefixed or suffixed ".": gsub(/^[.][.]*/, "", $0); gsub(/[.][.]*$/, "", $0); # In fact, delete all "." (for now): gsub(/[.]/, "", $0); # Collapse multiple "-" and ".": gsub(/[-.]*[-][-.]*/, "-", $0); gsub(/[.][.][.]*/, ".", $0); # Delete braces: gsub(/[{}]*/, "", $0); # Protect empty strings gsub(/^ *$/, "()", $0); print $0; } function error(msg) { printf "line %d: %s\n", NR, msg >> "/dev/stderr"; abort = 1; exit 1; }