#! /bin/csh -f # Last edited on 2000-01-22 21:08:59 by stolfi set usage = "$0 FTAG" # Tabulates the features found by "analyze-features" # sorted into categories. Assumes available the following files: # # text-subsecs/all.names # a list of names of subsections # # text-subsecs/XXX.evt # the text of subsection XXX (where XXX is found in all.names), # in EVMT format. # # classify-FTAG-strings # a filter that takes a stream of Voynichese words, one per line, # with counts as in "uniq -c"; and outputs the same file, # sorted by arbitrary categories. Also adds at the end # of each per category a line of the form "# NNNNN TOTAL". # # stats-subsecs/FTAG/XXX.frq # counts and frequencies of the FTAG values in section XXX, # for each XXX. Also the overal counts and frequencies # when XXX="tot". # # Creates the following files: # # stats-subsecs/FTAG/XXX-cls.frq # same as XXX.frq, with entries sorted and subtotalized # by category, through classify-FATG-strings. # if ( $#argv != 1 ) then echo "usage: ${usage}"; exit 1 endif set ftag = "$1"; shift; if ( ! ( -d stats-subsecs/${ftag} ) ) then echo "directory stats-subsecs/${ftag} not found." exit 1 endif # Classify the entries by the classifier script, if present: set frqfile = "stats-subsecs/${ftag}/tot.frq" set clsfile = "stats-subsecs/${ftag}/tot-cls.frq" set cscript = "classify-${ftag}-strings" if ( ! ( -r ${frqfile} ) ) then echo "file ${frqfile} not found." exit 1 endif if ( ! ( -x ${cscript}) ) then echo "script ${cscript} not found." exit 1 endif echo "${frqfile} -> ${clsfile}" cat ${frqfile} \ | gawk '/./{printf "%7d %s\n", $1, $3;}' \ | classify-${ftag}-strings \ | compute-freqs \ > ${clsfile}