#! /usr/bin/gawk -f # Last edited on 2025-05-04 21:44:42 by stolfi BEGIN { abort = -1; usage = ( "tex_format_raw_gud_bad_summary.gawk \\\n" \ " -v lang={LANG} -v book={BOOK} [ -v totSection={TOTSEC} ] \\\n" \ " < INFILE.wct > OUTFILE.tex" \ ); # Reads from stdin a file with counts of tokens and lexeme counts per section # of subset {BOOK} of language {LANG}. Outputs the main numbers from that file # as a set of '\newcommand{MACRO}{VALUE} for inclusion in a TeX document. # For the input format, see Note-100.txt, file "gen/{LANG}/{BOOK}/???" # Assumes that the line with section {totSec} refers to the whole book. # # The macros have names "\{LANG}{BOOK}{OSEC}{xxx}{yyy}{zzz}" where # {xxx} is "Raw", "Gud", or "Bad", {yyy} is "Tks" (tokens) or "Wds" (lexemes), # and {zzz} is either empty for counts, or "Pct" for percentages. if (lang == "") { arg_error("must define \"lang\""); } if (book == "") { arg_error("must define \"book\""); } if (totSection == "") { totSection = "tot.1" } split("", tw); tw[0] = "Tks"; tw[1] = "Wds"; split("B C D E F G H I J K", dig_to_let); # Converts digits 1..9 to letters 'B'..'K'. printf "% Created by tex_format_raw_gud_bad_summary.gawk\n"; found_tot = 0; } (abort >= 0) { exit abort; } /^ *([#/]|$)/ { next; } /./ { if (NF != 11) { data_error("bad input format"); } sec = $1; # Define the section part of the macro name: if ($1 == totSection) { osec = ""; found_tot = 1; } else { osec = ( substr(sec,1,3) dig_to_let[substr(sec,5,1)+0] ); } for (k = 0; k < 2; k++) { raw_num = $(5*k + 2); gud_num = $(5*k + 3); gud_pct = $(5*k + 4); bad_num = $(5*k + 5); bad_pct = $(5*k + 6); printf "%\n"; pdef_num(osec, ("Raw" tw[k]), raw_num); pdef_num(osec, ("Gud" tw[k]), gud_num); pdef_pct(osec, ("Gud" tw[k] "Pct"), gud_pct); pdef_num(osec, ("Bad" tw[k]), bad_num); pdef_pct(osec, ("Bad" tw[k] "Pct"), bad_pct); } } END { if (! found_tot) { data_error(("could not find section \"" totSection "\"")); } } function pdef_num(osec, var, val) { pdef(osec, var, val); } function pdef_pct(osec, var, val) { pdef(osec, var, sprintf("%.1f", val)); } function pdef(osec, var, val) { printf "\\newcommand{\\%s%s%s%s}{%s}\n", lang, book, osec, var, val; } function arg_error(msg) { printf "%s\n", msg >> "/dev/stderr"; printf "usage: %s\n", usage >> "/dev/stderr"; abort = 1; exit 1; } function data_error(msg) { printf "line %d: %s\n", FNR, msg > "/dev/stderr"; abort = 1; exit 1; }