#! /usr/bin/gawk -f # Last edited on 2004-02-17 15:19:37 by stolfi BEGIN { abort = -1; usage = ( "tex-format-raw-gud-bad-counts \\\n" \ " < INFILE.wct > OUTFILE.tex" \ ); # Tabulates the counts of raw, good, and bad tokens/words by section, as a LaTeX table. # Assumes the input records have 11 fields # # SEC RAWTK GUDTK GUDTKPPM BADTK BADTKPPM RAWWD GUDWD GUDWDPPM BADWD BADWDPPM # # where RAWTK = GUDTK + BADTK, GUDTKPPM = 1000*GUDTK/RAWTK, BADTKPPM = 1000*BADTK/RAWTK, # and ditto for words. If SEC = "/" the rest of the line is ignored, and a # break is inserted in the table. ns = 0; # Number of sections (including "/" s) # These arrays are indexed by s = [1..ns] split("", stag); split("", traw); split("", tgud); split("", tbad); split("", wraw); split("", wgud); split("", wbad); } (abort >= 0) { exit abort; } /^ *([#]|$)/ { next; } /./ { i = ns+1; stag[i] = $1; # Take care to eliminate zero entries, and any double "/"s that may result. if (stag[i] == "/") { if ((ns == 0) || (stag[ns] != "/")) { ns++; } } else { if (NF != 11) { data_error(("bad line format = |" $0 "|")); } traw[i] = $2+0; tgud[i] = $3+0; tbad[i] = $5+0; wraw[i] = $7+0; wgud[i] = $8+0; wbad[i] = $10+0; if (traw[i] != tgud[i] + tbad[i]) { data_error(("toks error = |" $0 "|")); } if (wraw[i] != wgud[i] + wbad[i]) { data_error(("wrds error = |" $0 "|")); } if (traw[i] > 0) { ns++; } } next; } END { if (abort >= 0) { exit abort; } # Ignore final "/" if any if ((ns > 0) && (stag[ns] == "/")) { ns--; } print_table(); } function print_table( i,s) { printf "%% Created by %s\n", ARGV[0]; # Table header: printf "\\begin{tabular}{|l|r|rr|rr||r|rr|rr|} \\hline\n"; printf " "; printf " &"; printf " \\multicolumn{5}{|c||}{{\\rm Tokens}}"; printf " & "; printf " \\multicolumn{5}{c|}{{\\rm Words}}"; printf " \\\\ \\hline\n"; printf " {\\rm Sec}"; for (i = 1; i <= 2; i++) { printf " &"; printf " \\multicolumn{1}{|r|}{{\\rm Total}}"; printf " & "; printf " \\multicolumn{2}{r|}{{\\rm Accepted}}"; printf " & "; printf " \\multicolumn{2}{r|}{{\\rm Discarded}}"; } printf " \\\\ \\hline\n"; # Table entries: for (s = 1; s <= ns; s++) { if (stag[s] == "/") { printf " \\hline\n"; } else { printf " {\\tt %s}", stag[s]; print_counts(traw[s], tgud[s], tbad[s]); print_counts(wraw[s], wgud[s], wbad[s]); printf " \\\\\n"; } } printf " \\hline\n"; printf "\\end{tabular}%%\n"; } function print_counts(ctraw,ctgud,ctbad, xctraw,xctgud,xfrgud,xctbad,xfrbad) { xctraw = ("\\ct{" sprintf("%d", ctraw) "}"); xctgud = ("\\ct{" sprintf("%d", ctgud) "}"); xfrgud = ("\\pc{" sprintf("%.1f", 100*ctgud/(ctraw == 0 ? 1 : ctraw)) "}"); xctbad = ("\\ct{" sprintf("%d", ctbad) "}"); xfrbad = ("\\pc{" sprintf("%.1f", 100*ctbad/(ctraw == 0 ? 1 : ctraw)) "}"); printf " & %10s", xctraw; printf " & %10s", xctgud; printf " & %10s", xfrgud; printf " & %10s", xctbad; printf " & %10s", xfrbad; } function arg_error(msg) { printf "%s\n", NR, msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit 1; } function data_error(msg) { printf "line %d: %s\n", FNR, msg > "/dev/stderr"; abort = 1; exit 1; }