#! /usr/bin/gawk -f # Last edited on 2000-07-10 00:03:21 by stolfi # Splits a multipage EVT-format file into one file per page, # discarding #-comments and selecting the majority-vote version. # Also writes to stdout a list of the files, in the order written BEGIN { abort = -1; usage = "split-pages [-v outdir=DIR] [-v trcode=LETTER] < FILE > PAGELIST"; if (trcode == "") { trcode = "A"; } if (! match(trcode, /^[A-Z]$/)) { error("bad trcode"); } ofnum = ""; otrcd = ""; ofile = ""; ounln = ""; if (outdir == "") { outdir = "."; } } (abort >= 0) { exit abort; } /^#/ { next; } /^ *$/ { next; } /^[<][^<>. ]*[>]/ { next; } /^[<][f][0-9][0-9]*[vr][0-9]*\.[A-Za-z][A-Za-z0-9]*\..*[>]/ { tmp = substr($0,2,index($0,">")-2); skip = 19; # Analyze and regularize location code: gsub(/[.;]/, " ", tmp); nlocf = split(tmp, locf); if ((nlocf < 3) || (nlocf > 4)) error("bad location format"); fnum = locf[1]; unit = locf[2]; line = locf[3]; if (nlocf >= 4) { trcd = locf[4]; } else { trcd = "X"; } if (skip >= length($0)) next; txt = substr($0,1+skip); loc = sprintf ("<%s.%s.%s;%s>", fnum, unit, line, trcd); unln = ( unit "." line ); if ( fnum != ofnum ) { # New page oout(); if (ofile != "") { close(ofile); } printf "%s\n", fnum; ofile = ( outdir "/" fnum ".evt" ); ofnum = fnum; ounln = unln; } else if ( ounln != unln ) { # Same page, new line oout(); ounln = unln; } if (trcd == trcode) { oloc = loc; otxt = txt; otrcd = trcd; } next; } /./ { error("bad location code"); } END{ if (abort >= 0) { exit abort; } oout(); if (ofile != "") { close(ofile); } } function oout() { # Writes the current line "(oloc, otxt)" to file "ofile" # and clears it. if (ounln != "") { # We do have a current line. if (otrcd == "") { error(("missing majority version of " oloc)); } if (ofile == "") { error(("file not open???")); } printf "%-19s%s\n", oloc, otxt >> ofile; oloc = ""; otxt = ""; otrcd = ""; } } function error(msg) { printf "line %d: %s\n", NR, msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit abort; }