#! /usr/bin/gawk -f # Last edited on 2002-01-03 22:40:49 by stolfi BEGIN { abort = -1; usage = ( ARGV[0] "\\\n" \ " { -v elemList='a,o,...' | -v elemTable=FILE } \\\n" \ " [ -v maxRepeat=NUM ] \\\n" \ " [ -v minFreq=FRQ ] \\\n" \ " [ -v freqDigits=NUM ] \\\n" \ " [ -v showCounts=BOOL ] \\\n" \ " [ -v showFreqs=BOOL ] \\\n" \ " < INFILE.frq > OUTFILE.tex" \ ); # Tabulates given counts and/or frequencies of symbols # and formats the output as a LaTeX table. # Assumes the input records have fields # # COUNT FREQ GLYPH{REPEAT} # # where GLYPH is an EVA string, already capitalized # REPEAT is an integer, and COUNT and FREQ are the # statistics of maximal GLYPH^REPEAT in some sample. # The output is formatted as `ncols' columns, filled row-wise. # # The output entries correspond to the glyphs listed in the # "elemList" string or in the "elemTable" file. In these lists, # a "-" glyph inserts an horizontal line. # This special glyph may occur multiple times. if (maxRepeat == "") { maxRepeat = 4; } if (showCounts == "") { showCounts = 1; } if (showFreqs == "") { showFreqs = 1; } if (minFreq == "") { minFreq = 0.00005; } if (freqDigits == "") { freqDigits = 4; } if ((elemList == "") == (elemTable == "")) { arg_error("must define exactly one of \"elemList\" and \"elemTable\""); } split("", elem); split("", eindex); split("", eclass); if (elemList != "") { nelems = parse_explicit_elems(elemList,elem,eindex,eclass); } else { nelems = load_elems_from_file(elemTable,elem,eindex,eclass); } # indexed with the capitalized element and the repeat count: split("", ect); split("", efr); split("", maxrep); } (abort >= 0) { exit abort; } /^ *([#]|$)/ { next; } /./ { if (NF != 3) { data_error("bad line format"); } ct = $1; fr = $2; er = $3; nread++; if (! match(er, /^[A-Za-z?]+[{]/)) { data_error(("bad elem/repeat format \"" er "\"")); } e = substr(er, 1, RLENGTH-1); rep = substr(er, RLENGTH); if (rep !~ /^[{][0-9]+[}]$/) { data_error(("bad repeat format \"" rep "\"")); } gsub(/[{}]/, "", rep); rep = rep + 0; if ((rep < 1) || (rep > maxRepeat)) { data_error(("bad repeat value \"" rep "\"")); } if ((e,rep) in ect) { data_error(("repeated elem/repeat \"" er "\"")); } if (! (e in maxrep)) { maxrep[e] = 0; } if (rep > maxrep[e]) { maxrep[e] = rep; } if (ct !~ /^[0-9]+$/) { data_error(("bad count format \"" ct "\"")); } if (fr !~ /^[0-9]*[.][0-9]*$/) { data_error(("bad freq format \"" fr "\"")); } ect[e,rep] = ct; efr[e,rep] = fr; next; } END { if (abort >= 0) { exit abort; } print_elem_freqs_table(); } function print_elem_freqs_table( i,e,xe) { printf "%% Created by tex-format-elem-rep-freqs\n"; output_table_preamble(); output_table_header(); printf "nelems = %d:", nelems > "/dev/stderr"; for (i = 1; (i <= nelems); i++) { e = elem[i]; if (e == "-") { output_hline(); } else if (maxrep[e] > 1) { printf " %s", e > "/dev/stderr"; output_elem_name(format_elem(e)); output_elem_counts(e, ect, efr); printf " \\str\\\\\n"; } } printf "\n" > "/dev/stderr"; output_hline(); printf "\\end{tabular}%%\n"; } function output_table_preamble( col,rep) { printf "\\begin{tabular}{"; printf "|c|"; for (rep = 1; rep <= maxRepeat; rep++) { if (showCounts) { printf "r"; } if (showFreqs) { printf "r"; } printf "|"; } printf "}\n"; output_hline(); } function output_table_header( nc,rep) { nc = (showCounts ? 1 : 0) + (showFreqs ? 1 : 0); # Prints column headers printf " \\hd{glyph}\n"; for (rep = 1; rep <= maxRepeat; rep++) { printf " & \\multicolumn{%d}{r|}{\\hd{%d}} \n", nc, rep; } printf " \\\\\n"; output_hline(); } function output_hline() { printf " \\hline\n"; } function output_elem_name(xe) { printf " "; printf "%-10s ", xe; } function output_elem_counts(e,ect,efr, rep,ct,fr,xct,xfr) { for (rep = 1; rep <= maxRepeat; rep++) { ct = ect[e,rep]; xct = format_count(ct); fr = efr[e,rep]; xfr = format_freq(fr); output_entry_count(xct, xfr); } } function output_entry_count(xct,xfr) { if (showCounts) { printf "& %10s ", xct; } if (showFreqs) { printf "& %10s ", xfr; } } function format_elem(e) { if (e == "+") { return ("\\tot"); } else { return ("\\ev{" e "}"); } } function format_count(ct) { if (ct + 0 == 0) { return "\\zeroct"; } else { return ("\\ct{" sprintf("%d", ct) "}"); } } function format_freq(fr) { if (fr + 0 < minFreq) { return "\\zerofr"; } else { fr = sprintf("%*.*f", freqDigits+2, freqDigits, fr); if (fr >= 1.0) { fr = substr(fr, 1, freqDigits+1); } else { fr = substr(fr,2, freqDigits+1); } return ("\\fr{" fr "}"); } } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit 1; } function data_error(msg) { printf "line %d: %s\n", FNR, msg > "/dev/stderr"; abort = 1; exit 1; }