#! /usr/bin/gawk -f # Enumerates all N-grams in the input file. # Usage: "$0 -v n=NUM < INFILE > OUTFILE" # Lines are NOT padded with blanks. /./ { if (n < 1) { printf "bad value of n\n" > "/dev/stderr"; exit 1 } m = length($0) if (m < n) next for( i=1; i<= m-n+1; i++ ) { print substr($0, i, n) } next } /^$/ { next }