#! /usr/bin/gawk -f # Last edited on 2002-01-03 22:24:39 by stolfi BEGIN { abort = -1; usage = ( ARGV[0] "\\\n" \ " { -v elemList='a,o,...' | -v elemTable=FILE } \\\n" \ " [ -v ncols=NUM ] \\\n" \ " [ -v minFreq=FRQ ] \\\n" \ " [ -v freqDigits=NUM ] \\\n" \ " [ -v showHeader=BOOL ] \\\n" \ " [ -v showClasses=BOOL ] \\\n" \ " [ -v showCounts=BOOL ] \\\n" \ " [ -v showFreqs=BOOL ] \\\n" \ " < INFILE.frq > OUTFILE.tex" \ ); # Tabulates given counts and/or frequencies of symbols # and formats the output as a LaTeX table. # Assumes the input records have fields # # COUNT FREQ GLYPH # # where GLYPH is an EVA string, already capitalized # The output is formatted as `ncols' columns, filled row-wise. # # The output entries correspond to the glyphs listed in the # "elemList" string or in the "elemTable" file. In these lists, # if GLYPH = "~", the entry is left blank. # if GLYPH = "/", the current row is padded with blanks. # if GLYPH = "-", does the same, then inserts an horizontal line. # These special glyphs may occur multiple times. # # If showClasses is TRUE, also prints the element's class # at the leftmost column. This option is effective only # when the elements are read from a file. if (ncols == "") { ncols = 2; } if (showHeader == "") { showHeader = 0; } if (showCounts == "") { showCounts = 1; } if (showFreqs == "") { showFreqs = 1; } if (showClasses == "") { showClasses = ( elemTable != ""); } if (minFreq == "") { minFreq = 0.00005; } if (freqDigits == "") { freqDigits = 4; } if ((elemList == "") == (elemTable == "")) { arg_error("must define exactly one of \"elemList\" and \"elemTable\""); } split("", elem); split("", eindex); split("", eclass); if (elemList != "") { nelems = parse_explicit_elems(elemList,elem,eindex,eclass); } else { nelems = load_elems_from_file(elemTable,elem,eindex,eclass); } if (showClasses && (! hasclass)) { arg_error("there are no classes to show"); } # indexed with the capitalized element itself: split("", ect); split("", efr); } (abort >= 0) { exit abort; } /^ *([#]|$)/ { next; } /./ { if (NF != 3) { data_error("bad line format"); } ct = $1; fr = $2; e = $3; nread++; if (e !~ /^[A-Za-z?]+$/) { data_error(("bad elem \"" e "\"")); } if (e in ect) { data_error(("repeated elem \"" e "\"")); } ect[e] = ct; efr[e] = fr; next; } END { if (abort >= 0) { exit abort; } print_elem_freqs_table(); } function print_elem_freqs_table( \ i,col,row,oldrow,hline,cline,e,ct,fr,cl,oldcl,xe,xct,xfr,xcl \ ) { printf "%% Created by %s\n", ARGV[0]; # Table preamble output_table_preamble(); # Table header: if (showHeader) { output_table_header(); } # Table entries: oldcl = ""; row = 0; col = ncols+1; hline = 1; cline = 0; printf "nelems = %d:", nelems > "/dev/stderr"; for (i = 1; ((i <= nelems) || (col <= ncols)); i++) { # Assert: col > 1. e = (i <= nelems ? elem[i] : "~"); printf " %s", e > "/dev/stderr"; # Obtain element data: if (e ~ /^[-\/]$/) { while (col <= ncols) { output_entry(col, "", "", ""); col++; } if (e == "-") { cline = ncols; } } else { if (e == "~") { cl = oldcl; ct = 0; fr = 0; xe = ""; xcl = ""; xct = ""; xfr = ""; } else { cl = eclass[i]; ct = ect[e]; fr = efr[e]; # Format values if (showClasses && (cl != oldcl)) { xcl = ("\\cl{" cl "}"); while (col <= ncols) { output_entry(col, "", "", ""); col++; } hline = 1; cline = 0; } else { xcl = ""; } xe = format_elem(e); xct = format_count(ct); xfr = format_freq(fr); } # Print element entry if (col > ncols) { end_row(row, hline, cline); row++; col = 1; hline = 0; cline = 0; } # Assert: col <= ncols output_entry(col, xe, xct, xfr, xcl); col++; oldcl = cl; } } printf "\n" > "/dev/stderr"; end_row(row, 1, 0); printf "\\end{tabular}%%\n"; } function output_table_preamble( col) { printf "\\begin{tabular}{|"; if (showClasses) { printf "c|"; } for (col = 0; col < ncols; col++) { printf "c"; if (showCounts) { printf "r"; } if (showFreqs) { printf "r"; } printf "|"; } printf "}\n"; } function output_table_header( col) { # Prints column headers printf " \\hline\n"; if (showClasses) { printf " \\hd{class} &"; } printf " \\multicolumn{%d}{l|}{\\hd{glyphs}} \\\\\n", ncols; } function format_elem(e) { if (e == "+") { return ("\\tot"); } else { return ("\\ev{" e "}"); } } function format_count(ct) { if (ct + 0 == 0) { return "\\zeroct"; } else { return ("\\ct{" sprintf("%d", ct) "}"); } } function format_freq(fr) { if (fr + 0 < minFreq) { return "\\zerofr"; } else { fr = sprintf("%*.*f", freqDigits+2, freqDigits, fr); if (fr >= 1.0) { fr = substr(fr, 1, freqDigits+1); } else { fr = substr(fr,2, freqDigits+1); } return ("\\fr{" fr "}"); } } function end_row(row,hline,cline, fcol) { if (row > 0) { printf "\\str\\\\\n"; } if (hline) { printf " \\hline\n"; } else if (cline > 0) { fcol = 1 + (showClasses ? 1 : 0 ); printf " \\cline{%d-%d}\n", fcol, fcol + cline - 1; } } function output_entry(col,xe,xct,xfr,xcl) { if (col != 1) { printf "&\n"; } printf " "; if ((col == 1) && showClasses) { printf "%10s &\n ", xcl; } printf "%-10s ", xe; if (showCounts) { printf "& %10s ", xct; } if (showFreqs) { printf "& %10s ", xfr; } } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit 1; } function data_error(msg) { printf "line %d: %s\n", FNR, msg > "/dev/stderr"; abort = 1; exit 1; }