#! /n/gnu/bin/gawk -f # Last edited on 2000-02-01 22:24:27 by stolfi # Reads a stream of element-factored words, writes the words # prefixed by word type /^ *$/{next;} /./{ # Delete dummy (empty) factors: gsub(/{[_]*}/, "", $0); gsub(/[_][_]*/, "", $0); # Map "ch" and "sh" to "C" and "S" to simplify processing: gsub(/ch/, "C", $0); gsub(/sh/, "S", $0); # Label and split components: crust/mantle prefixes/suffixes, core. if ($0 ~ /^[^CSktpfech]*$/) { # Crust-only word: $0 = ( "ps-" $0 ); } else if ($0 ~ /^[^ktpf]*$/) { # Empty core, nonempty mantle: # Must have at least one non-crust element: if ($0 !~ /[{].*[CSktpfech].*[}]$/) { error(("bad format = «" $0 "»")); } # Measure crust prefix and suffix: match($0, /^([^CSktpfech]*)[{]/); plen = RLENGTH-1; match($0, /[}]([^CSktpfech]*)$/); slen = RLENGTH-1; $0 = ( (plen > 0 ? "p" : "") "mn" (slen > 0 ? "s" : "") "-" $0 ); } else { # Non-empty core # Must have at least one core element: if ($0 !~ /[{].*[ktpf].*[}]$/) { error(("bad format = «" $0 "»")); } # Measure crust prefix, mantle prefix, mantle suffix, and crust suffix: match($0, /^([^ktpf]*)[{]/); pmlen = RLENGTH-1; match($0, /^([^CSktpfech]*)[{]/); plen = RLENGTH-1; match($0, /[}]([^ktpf]*)$/); nslen = RLENGTH-1; match($0, /[}]([^CSktpfech]*)$/); slen = RLENGTH-1; $0 = ( \ (plen > 0 ? "p" : "") \ (pmlen > plen ? "m" : "" ) \ "c" \ (nslen > slen ? "n" : "" ) \ (slen > 0 ? "s" : "") \ "-" $0 ); } # Delete braces gsub(/[{}]/, "", $0); # Restore "cs" and 'sh": gsub(/C/, "ch", $0); gsub(/S/, "sh", $0); print $0; } function error(msg) { printf "line %d: %s\n", NR, msg >> "/dev/stderr"; abort = 1; exit 1; }