#! /usr/bin/gawk -f # Last edited on 1998-12-22 12:32:30 by stolfi BEGIN { usage = ( "best-pick [-v trcodes=STRING] [-v discardComments=1] < FILE.evt > FILE.evt" ); abort = -1; # Reads an EVT-format file and selects the "best" transliteration # for each line. Optionally discards #-comments. opgun = ""; otrcd = ""; ofile = ""; oline = ""; ocmts = ""; if (trcodes == "") { trcodes = "UVHZTFABENOPRSWXYKQLMJIGCD"; } if (discardComments == "") { discardComments = 0; } len = length(trcodes); if ( len != 26 ) { arg_error("bad trcodes"); } for (i=1; i<=len; i++) { c = substr(trcodes, i, 1); trpri[c] = i; } } function oout() { # Writes the current line "(oloc, otxt, ocmts)" # and clears it. if (opgun != "") { printf "%-19s%s\n", oloc, otxt; oloc = ""; otxt = ""; } printf "%s", ocmts; ocmts = ""; } /^[#]/ { if (abort >= 0) { exit abort; } if (! discardComments) { ocmts = (ocmts $0 "\n"); } next; } /^ *$/{ if (abort >= 0) { exit abort; } next; } /^<.*> *$/{ if (abort >= 0) { exit abort; } oout(); print; next; } /^= 0) { exit abort; } if (match($0, /^")-2); skip = 19; # Analyze and regularize location code: gsub(/[.;]/, " ", tmp); nf = split(tmp, locf); if (nf == 3) { pgun = locf[1]; line = locf[2]; trcd = locf[3]; } else if (nf == 4) { pgun = (locf[1] "." locf[2]); line = locf[3]; trcd = locf[4]; } else { format_error("bad locator fields"); } fnum = locf[1]; } else { format_error("bad locator format"); } if (skip >= length($0)) next; txt = substr($0,1+skip); loc = sprintf ("<%s.%s;%s>", pgun, line, trcd); if ( pgun != opgun ) { oout(); opgun = pgun; oline = line; otrcd = ""; } else if ( oline != line ) { oout(); oline = line; otrcd = ""; } if (( otrcd == "" ) || ( trpri[trcd] < trpri[otrcd] ) ) { oloc = loc; otxt = txt; otrcd = trcd; } next; } //{ format_error("bad line format"); } END{ if (abort >= 0) { exit abort; } oout(); } function format_error(msg) { printf "file %s, line %d: %s\n", FILENAME, FNR, msg >> "/dev/stderr"; abort = 1; print $0 > "/dev/stderr"; exit 1; } function arg_error(msg) { printf "%s\n", msg >> "/dev/stderr"; printf "usage: %s\n", usage >> "/dev/stderr"; abort = 1; exit 1; }