#! /usr/bin/gawk -f # Last edited on 2000-07-10 00:13:29 by stolfi # Must specify -f eva2erg.gawk # Adds a match key that can be used to find similar words. # # cat INFILE \ # | add-match-key -f eva2erg.gawk \ # [ -v inField=IFLDNUM ] \ # [ -v outField=OFLDNUM ] \ # [ -v erase_ligatures=1 ] \ # [ -v erase_plumes=1 ] \ # [ -v ignore_gallows_eyes=1 ] \ # [ -v join_ei=1 ] \ # [ -v equate_aoy=1 ] \ # [ -v collapse_ii=1 ] \ # [ -v equate_eights=1 ] \ # [ -v equate_pt=1 ] \ # [ -v erase_q=1 ] \ # [ -v erase_word_spaces=1 ] \ # [ -v verbose=0 ] \ # > OUTFILE # # This script reads from stdin one or more records that contain a # Voynichese word KEY. It outputs the same redords, each preceded by a # "reduced" version RKEY of the same word (and one blank). # # The KEY word is assumed to be field IFLDNUM of each record # (the first field by default). # # The options define the mapping from KEY to RKEY. # If given they are applied in the order above. # See eva2erg.gawk for explanations of theit effect. # BEGIN { abort = 0; if (inField == "") inField = 1; if (outField == "") outField = 1; if (verbose == "") verbose = 0; if (verbose) { printf "options:\n" > "/dev/stderr"; if (erase_ligatures) printf " erase_ligatures\n" > "/dev/stderr"; if (erase_plumes) printf " erase_plumes\n" > "/dev/stderr"; if (ignore_gallows_eyes) printf " ignore_gallows_eyes\n" > "/dev/stderr"; if (join_ei) printf " join_ei\n" > "/dev/stderr"; if (equate_aoy) printf " equate_aoy\n" > "/dev/stderr"; if (collapse_ii) printf " collapse_ii\n" > "/dev/stderr"; if (equate_eights) printf " equate_eights\n" > "/dev/stderr"; if (equate_pt) printf " equate_pt\n" > "/dev/stderr"; if (erase_q) printf " erase_q\n" > "/dev/stderr"; if (erase_word_spaces) printf " erase_word_spaces\n" > "/dev/stderr"; } } /./ { if (abort) exit; if (NF < inField) { error(("only " NF " input fields ")); } printout(recode_text(erg_erase_comments($(inField))), outField); next; } function error(msg) { printf "line %d: %s\n", NR, msg > "/dev/stderr" abort = 1 exit } function recode_text(str) { # Converts a textstring with no embedded "{}"s to the # requested encoding. str = (erase_ligatures ? erg_erase_ligatures(str) : str); str = (erase_plumes ? erg_erase_plumes(str) : str); str = (ignore_gallows_eyes ? erg_ignore_gallows_eyes(str) : str); str = (join_ei ? erg_join_ei(str) : str); str = (equate_aoy ? erg_equate_aoy(str) : str); str = (collapse_ii ? erg_collapse_ii(str) : str); str = (equate_eights ? erg_equate_eights(str) : str); str = (equate_pt ? erg_equate_pt(str) : str); str = (erase_q ? erg_erase_q(str) : str); str = (erase_word_spaces ? erg_erase_word_spaces(str) : erg_unify_word_spaces(str)); return erg_pack(str); } function printout(mw, fn, i) { # prints $0 with "mw" inserted as field "$(fn)" if (NF < fn-1) { error("not enough output fields\n"); } if (fn == 1) { print mw, $0; } else if (fn == NF+1) { print $0, mw; } else { for (i=1;i