#! /bin/gawk -f # Last edited on 2002-01-15 05:16:47 by stolfi BEGIN { abort = -1; usage = ( \ "car INFILE.evt \\\n" \ " | select-evt-lines \\\n" \ " -f SAMPLEFNS.gawk \\\n" \ " -v sample=SAMPLE -v subsec=SEC.K \\\n" \ " -v maxLines=NUM \\\n" \ " > OUTFILE.evt" \ ); # Selects from the appropriate EVMT-formatted source file # a subset of # lines that is adequate for statistical analysis. # If "maxLines" is specified, truncates the # output after that many lines (not counting comments). # # The package SAMPLEFNS.gawk must define the predicate # select_evt_line(subsec,chapter,unit,linenum) if (sample == "") { arg_error("must define \"sample\""); } if (subsec == "") { arg_error("must define \"subsec\""); } if (maxLines == "") { maxLines = 999999999; } nread = 0; # Number of data lines read nwrite = 0; # Number of data lines written printf "# SELECTED SUBSET\n", ARGV[0], sample; printf "# Extracted by select-evt-lines (sample = %s subsec = %s)\n", sample, subsec; } (abort >= 0) { exit abort; } /^[ ]*([#]|$)/ { print; next; } (nwrite >= maxLines) { printf "# TRUNCATED AFTER %d DATA LINES\n", maxLines; exit 0; } /^[<]/ { nread++; # Parse the line locator: if (! match($0, /^[<][a-zA-Z0-9]+[.][A-Za-z0-9]+[.][A-Za-z0-9]+[>]/)) { data_error(("bad line locator format \"" substr($0,1,19) "\"")); } loc = substr($0, 1, RLENGTH); lin = substr($0, RLENGTH + 1); gsub(/[ <>]/, "", loc); if (match(loc, /[;][A-Za-z0-9]+$/)) { version = substr(loc, RSTART+1, RLENGTH-1); loc = substr(loc, 1, RSTART-1); } else { version = "A"; } gsub(/[.]/, " ", loc); nf = split(loc, locf); if (nf != 3) { data_error(("bad number of fields in locator \"" loc "\"")); } chapter = locf[1]; unit = locf[2]; linenum = locf[3]; # Decide if line should be saved: if (select_evt_line(subsec, chapter, unit, linenum)) { nwrite++; print; } next; } // { data_error("neither text nor comment"); } END { if (abort >= 0) { exit abort; } printf "%s: %7d data lines read, %7d written\n", sample, nread, nwrite > "/dev/stderr"; } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort=1; exit 1; } function data_error(msg) { printf "line %d: %s\n", FNR, msg > "/dev/stderr"; abort = 1; exit 1; }