#! /usr/bin/gawk -f # Last edited on 2000-02-02 21:24:42 by stolfi BEGIN { abort = -1; # Reads a file of word counts, as produced by "uniq -c", # divided into section by blank lines and/or #-comments. # # Outputs a similar file, in the format # # COUNT SCUMCT SFRAC SCUMFR FCUMCT FFRAC FCUMFR WORD... # # where COUNT WORD... is the original record, SCUMCT and FCUMCT are # the cumulative counts per section and per file, SFRAC and FFRAC # are the fractions of COUNT relative to the section and file # totals, and SCUMFR and FCUMFR are the corresponding cumulative # fractions. # # Preserves blank lines and comments. split("", w); split("", ct); nr = 0; OFS = " "; } # Slurp input file: (abort >= 0) { exit abort; } /^ *([#]|$)/ { ct[nr] = "#"; w[nr] = $0; nr++; next; } //{ if (NF != 2) { error(("line " NF ": bad format")); } ct[nr] = $1; $1 = ""; w[nr] = $0; nr++; next; } # Print it out: END { # Compute total totalorum: totF = 0; for(i=0; i "/dev/stderr"; abort = 1; exit abort; }