#! /bin/csh -f set usage = "$0 PATTERN FILE..." # Each FILE must contain words, one per line # Enumerates all pairs of consecutive words that match PATTERN # Lists the 10 most frequent # Prints the tables for each file, side by side set pat = "$1"; shift; if ( $#argv < 1 ) then echo "usage: $usage"; exit 1 endif set files = ( $* ) set outs = ( ) set head = "" set dash = "" set wd = 0 foreach f ( ${files} ) echo $f set g = .${f:r}.frq cat $f \ | egrep -v '^[-/]*$' \ | sed \ -e 's/t/k/g' \ -e 's/[ao]$/y/' \ -e 's/^\([q]*\)y/\1o/' \ | enum-word-pairs \ | egrep "${pat}" \ | sort | uniq -c | expand \ | sort +0 -1nr \ | head -10 \ > ${g} set outs = ( $outs ${g} ) @ wd = ${wd} + 30 set head = "${head}`printf ' count %-22s' ${f}`" set dash = "${dash} ----- ----------------------" end echo "${head}" echo "${dash}" pr -m -t -i' '1 -w ${wd} $outs /bin/rm -f $outs