#! /usr/bin/gawk -f # Reads from stdin a bunch of pairs of the form COUNT WORD, # one per line (such as produced by "uniq -c"), where the # same WORD may occur several times. Adds all COUNTs for # the same WORD, and writes the resulting TOTCOUNT WORD pairs # to standard output, in some order. BEGIN { abort = 0; usage = "combine-freqs < INFILE > OUTFILE"; if (ARGC != 1) { printf "usage: %s\n", usage > "/dev/stderr"; abort=1; exit 1; } } /./ { if (abort) { exit 1; } if (NF != 2) { print "line %d, bad format \"%s\"\n", NR, $0 > "/dev/stderr"; abort=1; exit 1; } ct[$2] += $1; } END { for (w in ct) { printf "%7d %s\n", ct[w], w; } }