#! /usr/bin/gawk -f # Last edited on 2000-01-23 15:00:48 by stolfi # Must be called with "-f factor-text.gawk". # Reads a stream of words, keeps "circles" [aoy] and maps other # letters to class symbols (M for gallows, X for tables, R for # dealers+finals). The "Q" letters are erased, [aoy] are mapped to # "o". Letters "e" and "i" are folded into the preceding M/X and # following R, respectively/ /^ *$/{next;} /./{ # Break word into elements: $0 = factor_text($0); # Delete dummy (empty) factors: gsub(/{[_]*}/, "", $0); gsub(/[_][_]*/, "", $0); # Erase "Q" prefixes gsub(/^[{][q][}]/, "", $0); # Provide temporary word delimiters: gsub(/^/, "}", $0); gsub(/$/, "{", $0); # Map gallows to "M": gsub(/[{][ci][ktpf][h]*[e]?[}]/, "{M}", $0); gsub(/[{][ktpf][e]?[}]/, "{M}", $0); # Map tables to "X": gsub(/[{]ee[e]?[}]/, "{X}", $0); gsub(/[{][cs]h[e]?[}]/, "{X}", $0); # Map dealers to "D": gsub(/[{]i*[dlrsvx][e]?[}]/, "{D}", $0); # Map finals to "F": gsub(/[{]i*[nmjg][}]/, "{F}", $0); # Erase braces: gsub(/[{}]/, "", $0); # Separate prefix/coremantle/suffix if present: $0 = gensub(/^([^XM]*)([XM])/, "\\1«\\2", "g", $0); $0 = gensub(/([XM])([^XM]*)$/, "\\1»\\2", "g", $0); gsub(/[«]/, "«\n«", $0); gsub(/[»]/, "»\n»", $0); print $0; }