# Last edited on 2002-01-17 03:53:05 by stolfi # Token/word length distributions for reference languages # # Makefile for computing the token and word length # histograms for "gud" subset of a reference language sample, # in terms of a specified factoring into elements. all: @echo "make what?" PAPER_DIR := /home/staff/stolfi/papers/voynich-stats/techrep TBL_DIR := ${PAPER_DIR}/tables/auto FIG_DIR := ${PAPER_DIR}/figures/auto ###################################################################### # Client must define # ${LANG} = "engl", "chin", etc.; # ${BOOK} = "wow", "red", etc.; # ${ELEM} = "trivial", "viqr", etc. (element factorization); # LANG := LANG.IS.UNDEFINED ifneq "${LANG}" "LANG.IS.UNDEFINED" BOOK := BOOK.IS.UNDEFINED ifneq "${BOOK}" "BOOK.IS.UNDEFINED" ELEM := ELEM.IS.UNDEFINED ifneq "${ELEM}" "ELEM.IS.UNDEFINED" SUBDIR := ${LANG}/${BOOK}/tot.t WFR_FILE := sample/${SUBDIR}/gud.wfr CTS_FILE := sample/${SUBDIR}/gud-fact-${ELEM}.cts FACTOR_AWK := factor-text-${ELEM}.gawk single-sampelem: ${CTS_FILE} for tkwd in t w; do \ ${MAKE} LANG=${LANG} BOOK=${BOOK} TKWD=$$tkwd \ -f other-length-hists.make single-hist; \ done ${CTS_FILE}: ${WFR_FILE} \ factor-field-general ${FACTOR_AWK} \ compute-elem-counts other-length-hists.make @echo "${WFR_FILE} -> ${CTS_FILE}" cat ${WFR_FILE} \ | factor-field-general \ -f ${FACTOR_AWK} -v inField=3 -v outField=4 \ | gawk '//{ print $$1, $$3, $$4; }' \ | compute-elem-counts \ > ${CTS_FILE} ###################################################################### # Caller must define # ${LANG} = "engl", "chin", etc.; # ${BOOK} = "wow", "red", etc.; # ${ELEM} = "trivial", "viqr", etc. (element factorization); # ${TKWD} = "t" (tokens) or "w" (words). # TKWD := TKWD.IS.UNDEFINED ifneq "${TKWD}" "TKWD.IS.UNDEFINED" CTS_FILE := sample/${SUBDIR}/gud-fact-${ELEM}.cts LHI_FILE := sample/${SUBDIR}/gud-fact-${ELEM}-${TKWD}.lhi AVG_TEX := sample/${SUBDIR}/gud-fact-${ELEM}-${TKWD}-avlen.tex AVG_TEX_EXP := ${TBL_DIR}/${SUBDIR}/gud-fact-${ELEM}-${TKWD}-avlen.tex single-hist: ${LHI_FILE} ${AVG_TEX} ${LHI_FILE}: ${CTS_FILE} \ compute-elem-count-distrib \ other-length-hists.make @echo "${CTS_FILE} -> ${LHI_FILE}" cat ${CTS_FILE} \ | gawk -v tkwd="${TKWD}" \ '/./{ print (tkwd == "t" ? $$1 : 1), $$3, $$4; }' \ | compute-elem-count-distrib \ > ${LHI_FILE} cat ${LHI_FILE} ${AVG_TEX}: ${LHI_FILE} cat ${LHI_FILE} \ | gawk \ -v lg=${LANG} -v bk=${BOOK} -v ek=${ELEM} -v tw=${TKWD} \ ' /^[#]/{next;} \ /./{ t+= $$2; e += $$2*$$1; } \ END { \ xtw = ( tw == "t" ? "Tk" : "Wd" ); \ printf "\\def\\%s%sAvg%sN%s{%.2f}\n", lg, bk, xtw, ek, e/t; \ } \ ' \ > ${AVG_TEX} cat ${AVG_TEX} update-paper-include ${AVG_TEX} ${AVG_TEX_EXP} endif # End of ${LANG}/${BOOK}/${ELEM}/${TKWD} recursion ###################################################################### endif endif endif # End of ${LANG}/${BOOK}/${ELEM} section ######################################################################