#! /usr/bin/gawk -f # Last edited on 2000-05-26 02:34:49 by stolfi # Usage: "$0 < joinfile > output.cmp # This script is used internally by compare-freqs. The input should be # a file in the format "CT1 FR1 CT2 FR2 ... CTN FRN WORD" where the # "CT"s are counts anf the "FR"s are freqs in [0_1]. The WORD # should not contain any spaces. # # Computes the maximum frequency of each word. The output has the # format " FRMAX CT1 FR1 CT2 FR2 ... CTN FRN WORD" where "frm" # is the # maximum of "FR1" through "FRN". # # A comment in the input file beginning with "##" followed by a list # of column names is turned into a pair of column header comments. # Other "#"-comments are ignored /^[#][#]/ { printf "##%5.5s", "MAXFR"; for (i=2;i<=NF;i++) { printf " %-11.11s", $(i); } printf " %-11.11s", "WORD"; printf "\n"; printf "# %5.5s", "--------------------------"; for (i=2;i<=NF;i++) { printf " %-11.11s", "--------------------------"; } printf " %11.11s", "--------------------------"; printf "\n"; next; } /^[#]/ { next; } /./ { MAXFR=0; for (i=2;i<=NF-1;i+=2) { if ($(i) > MAXFR) { MAXFR = $(i); } } printf " %5.3f", MAXFR; for (i=1;i<=NF-1;i+=2) printf " %5d %5.3f", $(i), $(i+1); printf " %s\n", $(NF); next }