COMPUTING THE LENGTH DISTRIBUTION OF ROMAN NUMERALS Computing the length distributions: foreach ekind ( old new ) set ifile = ".roman-${ekind}.nums" set ofile = ".roman-${ekind}-length.cts" echo "${ifile} -> dat/${ofile}" cat ${ifile} \ | gawk '//{ print length($1)-1; }' \ | sort | uniq -c | expand \ | sort -b +1 -2n \ > dat/${ofile} end Make sure all samples have an appropriate element factoring filter "factor-text-standard.gawk": # set trivialfilter = "/home/staff/stolfi/voynich/work/factor-text-trivial.gawk" # foreach sample ( ${samples} ) # set dir = "sample/${sample}" # set filter = "${dir}/factor-text-standard.gawk" # if ( ! ( -x ${filter} ) ) then # ( cd ${dir} && ln -s ${trivialfilter} ${filter:t} ) # endif # end >>> STOPPED HERE <<< Extracting sets of Voynichese words of same length: foreach sample ( text labs ) foreach ekind ( basic oko ) foreach len ( 01 02 03 09 10 11 ) set ofile = "lang/${sample}/voyn-${ekind}-${len}.cts" echo "=== fig/${ofile} ===" cat lang/${sample}/voyn.wfr \ | capitalize-ligatures -v field=3 \ | factor-field-general \ -f factor-text-${ekind}.gawk -v inField=3 -v outField=4 \ | gawk '/./{ print $1, $4; }' \ | extract-words-by-elem-count -v len=${len} \ | sort -b +1 -2 \ > dat/${ofile} wc ${ofile} end end end