#! /usr/bin/gawk -f # Last edited on 2000-12-27 11:25:49 by stolfi BEGIN{ abort = -1; usage = ( "extract-words-by-elem-count -v len=LEN < WORDFILE > FREQFILE " ); # Input records must be # # COUNT WORD # # where WORD is factored into elements by "{}". Empty lines and # comments are ignored. Outputs those lines where # WORD has exactly LEN elements, in the same format # # COUNT WORD # if (len == "") { arg_error("must specify \"len\""); } } (abort >= 0) { exit abort; } /^ *([#]|$)/ { next; } /./ { ct = $1; w = $2; y = w; gsub(/}{/, "} {", y); m = split(y, wels); if (m == len) { print; } next; } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit abort; }