#! /usr/bin/gawk -f # Last edited on 2001-01-05 23:29:30 by stolfi BEGIN { abort = -1; usage = ( "compute-row-col-freqs \\\n" \ " [ -v outputTotals=BOOL ] \\\n" \ " < INFILE.pct > OUTFILE.pfr" \ ); # Reads a file with entries of the form COUNT ITEM1:ITEM2 pairs, # as produced by "uniq -c" # Outputs a similar file with lines of the form # COUNT FREQROW FREQCOL FREQTOT ITEM1:ITEM2 # where FREQROW is the fraction of COUNT relative to the total of # all COUNTs with same ITEM1, FREQCOL is the same relative to # ITEM2, and FREQTOT is the frequency relative to all items. # # If outputTotals is 1, also outputs extra pairs of the form GLYPH1:+, # +:GLYPH2, +:+ with the row, column, and table totals, respectively. if (outputTotals == "") { outputTotals = 0; } split("", totrow); split("", totcol); tottbl = 0; split("", count); split("", pair); n = 0; } (abort >= 0) { exit abort; } /^[ ]*([#]|$)/ { count[n] = "#"; pair[n] = $0; n++; next; } // { if (NF != 2) { fatal_error(("line " NF ": bad input format = «" $0 "»")); } ct = $1; pr = $2; count[n] = ct; pair[n] = pr; n++; nitems = split(pr, item, ":"); if (nitems != 2) { fatal_error(("line " NF ": bad pair format = «" pr "»")); } totrow[item[1]] += ct; totcol[item[2]] += ct; tottbl += ct; next; } END { if (abort >= 0) { exit abort; } dentbl = ( tottbl != 0 ? tottbl : 1 ) for (i=0; i "/dev/stderr"; abort = 1; exit abort; }