#! /usr/bin/gawk -f # Last edited on 2004-02-17 15:18:46 by stolfi BEGIN { abort = -1; usage = ( "tex-format-raw-gud-bad-summary \\\n" \ " -v smp=STRING [ -v totSection=TOTSEC ] \\\n" \ " < INFILE.wct > OUTFILE.tex" \ ); # Outputs a summary of the raw/good/bad token and word counts for # text subset SMP as a TeX macro definition file. Assumes that # the input records have 11 fields # # SEC RAWTK GUDTK GUDTKPPM BADTK BADTKPPM RAWWD GUDWD GUDWDPPM BADWD BADWDPPM # # where SEC is a section tag, RAWTK = GUDTK + BADTK, # GUDTKPPM = 1000*GUDTK/RAWTK, BADTKPPM = 1000*BADTK/RAWTK, # and ditto for words. Uses only the entry with SEC = TOTSEC. if (smp == "") { arg_error("must define \"smp\""); } if (totSection == "") { totSection = "tot.1" } split("", tw); tw[0] = "Tks"; tw[1] = "Wds"; printf "% Created by tex-format-raw-gud-bad-summary\n"; found_tot = 0; } (abort >= 0) { exit abort; } /^ *([#/]|$)/ { next; } /./ { if (NF != 11) { data_error("bad input format"); } if ($1 != totSection) { next; } found_tot = 1; for (k = 0; k < 2; k++) { printf "%\n"; pdef_i(("Raw" tw[k]), $(5*k + 2)); pdef_i(("Gud" tw[k]), $(5*k + 3)); pdef_r(("Gud" tw[k] "Pct"), $(5*k + 4)/10); pdef_i(("Bad" tw[k]), $(5*k + 5)); pdef_r(("Bad" tw[k] "Pct"), $(5*k + 6)/10); } } END { if (! found_tot) { data_error(("could not find section \"" totSection "\"")); } } function pdef_i(var, val) { pdef(var, val); } function pdef_r(var, val) { pdef(var, sprintf("%.1f", val)); } function pdef(var, val) { printf "\\def\\%s%s{%s}\n", smp, var, val; } function arg_error(msg) { printf "%s\n", msg >> "/dev/stderr"; printf "usage: %s\n", usage >> "/dev/stderr"; abort = 1; exit 1; } function data_error(msg) { printf "line %d: %s\n", FNR, msg > "/dev/stderr"; abort = 1; exit 1; }