#! /usr/bin/gawk -f # Last edited on 2000-12-27 13:19:08 by stolfi BEGIN{ abort = -1; usage = ( "compute-elem-counts < WORDFILE > FREQFILE " ); # Input records must be # # COUNT WORD FWORD # # where FWORD is WORD factored into elements by "{}". Empty lines and # comments are ignored. Outputs the same lines, with an extra # field -- the number of factors: # # COUNT WORD FWORD LEN # } (abort >= 0) { exit abort; } /^ *([#]|$)/ { next; } /./ { if (NF != 3) { data_error(("wrong number of fields")); } ct = $1; w = $2; fw = $3; y = fw; gsub(/}{/, "} {", y); nels = split(y, wels); print ct, w, fw, nels; next; } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit abort; } function data_error(msg) { printf "file %s, line %d: %s\n", FILENAME, FNR, msg > "/dev/stderr"; abort = 1; exit abort; }