#! /usr/bin/gawk -f # Last edited on 2004-11-19 02:02:19 by stolfi BEGIN { usage = ( \ ARGV[0] "\\\n" \ " -v table=TBLFILE [ -v inverse=BOOL ] \\\n" \ " [ -v pedantic=BOOL ] \\\n" \ " < INFILE > OUTFILE " \ ); abort = -1; # Reads an EVMT file from stdin, maps the locators through a table, # and writes the result to stdout. # # Each line of the {table} file should have two words "{OLD} {NEW}" # meaning that the old locator {OLD} should be mapped to {NEW}. # The locators in this table should not include the "<>" delimiters # nor the transcriber codes. # # Whenever an input location code is not found in the table, # it is left alone. A message is printed if {pedantic} is true. abort = -1; if (table == "") arg_error("must specify \"-v table=FILE\"\n"); if (inverse == "") { inverse = 0; } split("", dic); read_table(table,inverse,dic); } /^[\#]/ { print; next; } /^ *$/ { print; next; } function read_table(fname,inv,tbl, ntbl,nlin,lin,fld,nfld,tmp) { ntbl=0; nlin=0; while((getline lin < fname) > 0) { nlin++; if (! match(lin, /^[ \011]*([#]|$)/)) { nfld = split(lin, fld, " "); if ((nfld >= 3) && (fld[3] ~ /^[\#]/)) { nfld = 2; } if (nfld != 2) { tbl_error(fname, nlin, ("bad table entry = \"" lin "\"")); } # If {inv} is true, swap the two columns: if (inv) { tmp = fld[1]; fld[1] = fld[2]; fld[2] = tmp; } if (fld[1] in dic) { tbl_error(fname, nlin, ("repeated key = \"" lin "\"")); } tbl[fld[1]] = fld[2]; ntbl++; } } if (ERRNO != "0") { tbl_error(fname, nlin, ERRNO); } close (fname); if (nlin == 0) { arg_error(("file \"" fname "\" empty or missing")); } # printf "loaded %6d map pairs\n", ntbl > "/dev/stderr" } (abort >= 0) { exit abort; } /^(|[\#][\#] *); ]*)(|[;][A-Z])[>]/)) { format_error("bad location code format"); } head = substr(str,1,RSTART); loc = substr(str,RSTART+1,RLENGTH-2); tail = substr(str,RSTART+RLENGTH-1); if (match(loc, /[;]/)) { trc = substr(loc,RSTART); loc = substr(loc,1,RSTART-1); } else { trc = ""; } dlen = length(loc); if (loc in dic) { loc = dic[loc]; } else if (pedantic) { printf "file %s, line %d: locator not found: %s\n", FILENAME, FNR, loc > "/dev/stderr"; } dlen = length(loc) - dlen; while (dlen > 0) { sub(/^> /, ">", tail); dlen--; } while (dlen < 0) { sub(/^>/, "> ", tail); dlen++; } return (head loc trc tail); } function format_error(msg) { printf "file %s, line %d: %s\n", FILENAME, FNR, msg > "/dev/stderr"; abort = 1; exit 1; } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; abort = 1; exit 1; }