#! /usr/bin/gawk -f # Last edited on 1999-01-05 01:34:36 by stolfi BEGIN { usage = ( "remove-redundant-roc-entries < INFILE > OUTFILE" ); # Removes redundant entries from the raw concordance (7 fields). An # entry is redundant if it has the same location, position, length, # string, and context of a previous entry, differing only on the # transcriber code. abort = -1; # Data for previous entry: # "oloc" is the location code (field 1). # "olin" is the concatenation of fields 3-7. # "otrc" is the of transcriber code (field 2). # "otrcs" is the cat of "otrc" for all entries with same "oloc", "olin". # "ofnum" is the page's f-number extracted from "oloc". nwr = 0; nsk = 0; start_entry("", "", ""); start_page(""); } (abort >= 0) { exit abort; } /./{ if (NF != 7) { error("wrong number of fields"); } loc = $1; trc = $2; lin = ($3 " " $4 " " $5 " " $6 " " $7) if ((loc != oloc) || (lin != olin)) { finish_entry(); start_entry(loc, trc, lin); nwr++; pgwr++; if (! match(loc, /[.]/)) { error("no dot in locator"); } fnum = substr(loc, 1, RSTART-1); if (fnum != ofnum) { finish_page(); start_page(fnum); } } else { if (trc < otrc) { error("not sorted by transcriber"); } otrcs = (otrcs trc); nsk++; } ostart = start; olen = len; oleft = left; ostr = str; oright = right; nrd++; pgrd++; next; } END{ if (abort >= 0) { exit abort; } finish_entry(); finish_page(); printf "\n" > "/dev/stderr"; printf "%7d records read\n", nrd > "/dev/stderr"; printf "%7d records ignored\n", nsk > "/dev/stderr"; printf "%7d records written\n", nwr > "/dev/stderr"; } function start_entry(loc, trc, lin) { # Called when a new entry is starting. oloc = loc; otrc = trc; otrcs = trc; olin = lin; } function finish_entry() { # Called when current entry is complete. if (oloc != "") { print oloc, otrcs, olin; } } function start_page(fnum) { # Called when starting new page. pgrd = 0; pgwr = 0; ofnum = fnum; } function finish_page() { # Called when current page is complete. if (ofnum != "") { printf "%4d %4d %s\n", pgrd, pgwr, ofnum > "/dev/stderr"; } } function error (msg) { printf "file %s, line %d: %s\n", FILENAME, FNR, msg > "/dev/stderr"; print $0 > "/dev/stderr"; abort = 1; exit abort; }