Hacking at the Voynich manuscript - Side notes 109 Computing the token entropy Last edited on 2001-01-18 04:55:37 by stolfi INTRODUCTION In this note we compute the first-order token entropy for Voynichese and other languages. SETTING UP THE ENVIRONMENT Links: ln -s ../../compute-cum-cum-freqs ln -s ../../compute-cum-freqs ln -s ../../compute-freqs ln -s ../../combine-counts ln -s ../../compute-entropy ln -s ../../remove-freqs ln -s ../../totalize-fields ln -s ../../select-units ln -s ../../words-from-evt ln -s ../../format-counts-packed ln -s ../100/data ln -s ../101/lang Paper directories: set tbldir = "/home/staff/stolfi/papers/voynich-words/techrep/tables/auto" set figdir = "/home/staff/stolfi/papers/voynich-words/techrep/figures/auto" ENTROPIES OF TOKENS set ofile = "token-entropies.tex"; echo "${ofile}" /bin/rm -f ${ofile} foreach kf ( text.voyn labs.voyn text.engl text.latn ) set wkind = "${kf:r}"; set lang = "${kf:e}" cat lang/${lang}/${wkind}/gud.wfr \ | gawk '/./{ print $1; }' \ | compute-entropy \ > ".tmp" set entropy = ( `cat .tmp` ) echo "${kf}: ${entropy}" printf '\\def\\tkentropy'"${wkind}${lang}"'{'"${entropy}"'}\n' >> ${ofile} end /bin/mv -bv ${ofile} ${tbldir}/