# Last edited on 2002-04-12 10:53:47 by stolfi # Creates the VMS samples: MAKEFILE := vms-edition.make # MAKERULES := MAKERULES := ${MAKEFILE} LANG := voyn .PHONY: all source export clean # Trap spurious "make"s: all: source # "make source" creates the LaTeXsource file: source: work ${MAKE} -R -f ${MAKEFILE} ACTION=source everything # Remove derived files from "dat": clean: ${MAKE} -R -f ${MAKEFILE} ACTION=clean everything dat/${LANG}: ; mkdir dat/${LANG} exp/${LANG}: ; mkdir exp/${LANG} UTYPE_TBL := unit-to-type.tbl cat ${UNIT_TBL} \ | gawk -v FS=":" '/./{print $$2,$$6}' \ > ${UTYPE_TBL} ###################################################################### # Rules for a given ${ACTION} ("source", "export", "clean") # ACTION := ACTION.IS.UNDEFINED ifneq "${ACTION}" "ACTION.IS.UNDEFINED" # The various "books" (actually views of the same book, the VMS). # The "sectioned" ones are worth analyzing separately by subsection. # SECTIONED_BOOKS := maj prs lab UNSECTIONED_BOOKS := tak ini fin mid .PHONY: everything ev-recurse ev-pos-${ACTION} VMS_SUBSECS := ${shell cat subsections.tags} everything: ev-recurse ev-pos-${ACTION} ev-recurse: for book in ${SECTIONED_BOOKS}; do \ ${MAKE} -R -f ${MAKEFILE} \ BOOK=$$book \ SUBSECS="${VMS_SUBSECS}" \ ACTION=${ACTION} single-book; \ done for book in ${UNSECTIONED_BOOKS}; do \ ${MAKE} -R -f ${MAKEFILE} \ BOOK=$$book \ SUBSECS="" \ ACTION=${ACTION} single-book; \ done ev-pos-source: for sec in ${VMS_SUBSECS}; do \ printf "\n%-32s" "voyn/{prs,lab}/$$sec/raw.wfr: "; \ cat ${foreach B,prs lab,dat/${LANG}/${B}/$$sec/raw.wfr} \ | gawk '/./{t += $$1;} END{print t}' ; \ printf "%-32s" "voyn/maj/$$sec/raw.wfr: " ; \ cat dat/${LANG}/maj/$$sec/raw.wfr \ | gawk '/./{t += $$1;} END{print t}' ; \ done ev-pos-export: ev-pos-clean: ###################################################################### # Rules for given ${ACTION}/${BOOK} where # ${BOOK} = book to make ("prs", "lab", "maj", etc.) # BOOK := BOOK.IS.UNDEFINED ifneq "${BOOK}" "BOOK.IS.UNDEFINED" BOOK_DIR := ${LANG}/${BOOK} dat/${BOOK_DIR}: ; mkdir dat/${BOOK_DIR} exp/${BOOK_DIR}: ; mkdir exp/${BOOK_DIR} BOOK_SUBSEC_LIST := ${BOOK_DIR}/subsections.tags BOOK_SUBSEC_OK_LIST := ${BOOK_DIR}/subsections-ok.tags DAT_BOOK_SUBSEC_LISTS := dat/${BOOK_SUBSEC_LIST} dat/${BOOK_SUBSEC_OK_LIST} ###################################################################### # Rules for given ${ACTION}/${BOOK}/${SUBSECS} where # ${SUBSECS} = blank-separated list of subsection samples to # create for that book, excluding "tot.1". # SUBSECS := SUBSECS.IS.UNDEFINED ifneq "${SUBSECS}" "SUBSECS.IS.UNDEFINED" BOOK_RGB_CTS := ${BOOK_DIR}/raw-gud-bad-tw-counts BOOK_SUMM := ${BOOK_DIR}/raw-gud-bad-tw-summary DAT_BOOK_TARGETS := \ dat/${BOOK_RGB_CTS}.tex \ dat/${BOOK_SUMM}.tex .PHONY: single-book sb-recurse sb-pre-${ACTION} sb-pos-${ACTION} .PHONY: sb-show-sizes sb-check-subsec-total single-book: sb-pre-${ACTION} sb-recurse sb-pos-${ACTION} sb-recurse: dat/${BOOK_DIR} exp/${BOOK_DIR} for subsec in ${SUBSECS} tot.1; do \ ${MAKE} -R -f ${MAKEFILE} \ BOOK=${BOOK} \ SUBSEC=$$subsec \ ACTION=${ACTION} single-subsec; \ done sb-pre-source: ${DAT_BOOK_SUBSEC_LISTS} sb-pos-source: sb-show-sizes sb-check-subsec-total ${DAT_BOOK_TARGETS} sb-show-sizes: @dicio-wc ${foreach S,${SUBSECS} tot.1,dat/${BOOK_DIR}/${S}/raw.evt} @dicio-wc ${foreach S,${SUBSECS} tot.1,dat/${BOOK_DIR}/${S}/raw.lts} @dicio-wc ${foreach S,${SUBSECS} tot.1,dat/${BOOK_DIR}/${S}/raw.tks} @dicio-wc ${foreach S,${SUBSECS} tot.1,dat/${BOOK_DIR}/${S}/raw.wfr} \ | gawk '/./{ printf " %8s %s\n", $$1,$$4;}' @dicio-wc ${foreach S,${SUBSECS} tot.1,dat/${BOOK_DIR}/${S}/gud.wfr} \ | gawk '/./{ printf " %8s %s\n", $$1,$$4;}' @dicio-wc ${foreach S,${SUBSECS} tot.1,dat/${BOOK_DIR}/${S}/bad.wfr} \ | gawk '/./{ printf " %8s %s\n", $$1,$$4;}' +sb-check-subsec-total: @printf "\n%-24s" "dat/${BOOK_DIR}/*/raw.wfr: " @cat ${foreach S,${SUBSECS},dat/${BOOK_DIR}/${S}/raw.wfr} /dev/null \ | gawk '/./{t += $$1;} END{print t}' @printf "%-24s" "dat/${BOOK_DIR}/tot.1/raw.wfr: " @cat dat/${BOOK_DIR}/tot.1/raw.wfr \ | gawk '/./{t += $$1;} END{print t}' sb-pre-export: sb-pos-export: update-paper-include dat/${BOOK_RGB_CTS}.tex exp/${BOOK_RGB_CTS}.tex update-paper-include dat/${BOOK_SUMM}.tex exp/${BOOK_SUMM}.tex sb-pre-clean: sb-pos-clean: -rm -f ${BOOK_SUBSEC_LISTS} dat/${BOOK_SUBSEC_LIST}: dat/${BOOK_DIR} ${MAKERULES} echo "${SUBSECS}" \ | tr ' ' '\012' \ > dat/${BOOK_SUBSEC_LIST} dat/${BOOK_SUBSEC_OK_LIST}: dat/${BOOK_SUBSEC_LIST} cat dat/${BOOK_SUBSEC_LIST} \ | egrep -v '^(unk|xxx)' \ > dat/${BOOK_SUBSEC_OK_LIST} dat/${BOOK_RGB_CTS}.txt: ${MAKERULES} dat/${BOOK_DIR} \ count-raw-gud-bad-toks-wrds @echo " " @echo " Good/bad statistics for ${BOOK_DIR}:" @echo " " count-raw-gud-bad-toks-wrds \ dat/${BOOK_DIR} ${SUBSECS} / tot.1 \ > dat/${BOOK_RGB_CTS}.txt cat dat/${BOOK_RGB_CTS}.txt \ | sed -e 's:/::g' -e 's/^/ /' dat/${BOOK_RGB_CTS}.tex: ${MAKERULES} dat/${BOOK_RGB_CTS}.txt \ tex-format-raw-gud-bad-counts @echo " " @echo " dat/${BOOK_RGB_CTS}.txt -> dat/${BOOK_RGB_CTS}.tex" @echo " " cat dat/${BOOK_RGB_CTS}.txt \ | tex-format-raw-gud-bad-counts \ > dat/${BOOK_RGB_CTS}.tex dat/${BOOK_SUMM}.tex: ${MAKERULES} dat/${BOOK_RGB_CTS}.txt \ tex-format-raw-gud-bad-summary @echo " " @echo " dat/${BOOK_RGB_CTS}.txt -> ${BOOK_SUMM}.tex" @echo " " cat dat/${BOOK_RGB_CTS}.txt \ | tex-format-raw-gud-bad-summary \ -v sample=${LANG}${BOOK} \ > dat/${BOOK_SUMM}.tex endif # End of ${ACTION}/${BOOK}/${SUBSECS} rules ###################################################################### ###################################################################### # Rules for given ${ACTION}/${BOOK}/${SUBSEC} where # ${SUBSEC} is a specific subsection (possibly "tot.1"). # SUBSEC := SUBSEC.IS.UNDEFINED ifneq "${SUBSEC}" "SUBSEC.IS.UNDEFINED" SUBSEC_DIR := ${BOOK_DIR}/${SUBSEC} dat/${SUBSEC_DIR}: ; mkdir dat/${SUBSEC_DIR} exp/${SUBSEC_DIR}: ; mkdir exp/${SUBSEC_DIR} PRS_UTYPES := parags,starred-parags,circular-lines,circular-text,radial-lines,titles LAB_UTYPES := labels,words # Define ${SOURCE_EVT} and ${TRANS_TAG}: ifeq "${BOOK}" "tak" ifeq "${SUBSEC}" "tot.1" SOURCE_EVT := work/L16+H-eva/text16e6.evt else SOURCE_EVT := SOURCE_EVT.NOT.DEFINED endif TRANS_TAG := H else ifeq "${SUBSEC}" "tot.1" SOURCE_EVT := work/Notes/045/only-m.evt else SOURCE_EVT := work/Notes/045/subsecs-m/${SUBSEC}.evt endif TRANS_TAG := A endif # Define ${UTYPES} and ${LINE_SEL}: ifeq "${BOOK}" "tak" UTYPES := ${PRS_UTYPES},${LAB_UTYPES} OWN_EVT := YES LINE_SEL := endif ifeq "${BOOK}" "maj" UTYPES := ${PRS_UTYPES},${LAB_UTYPES} OWN_EVT := YES LINE_SEL := endif ifeq "${BOOK}" "prs" UTYPES := ${PRS_UTYPES} OWN_EVT := YES LINE_SEL := endif ifeq "${BOOK}" "lab" UTYPES := ${LAB_UTYPES} OWN_EVT := YES LINE_SEL := endif ifeq "${BOOK}" "ini" UTYPES := ${PRS_UTYPES} OWN_EVT := NO LINE_SEL := -v omitMedial=1 -v omitFinal=1 endif ifeq "${BOOK}" "mid" UTYPES := ${PRS_UTYPES} OWN_EVT := NO LINE_SEL := -v omitInitial=1 -v omitFinal=1 endif ifeq "${BOOK}" "fin" UTYPES := ${PRS_UTYPES} OWN_EVT := NO LINE_SEL := -v omitInitial=1 -v omitMedial=1 endif ifeq "${OWN_EVT}" "YES" # Create a private copy of the EVT file, with specified # units and subsection, converting all weirdos to basic # EVA chars, or to "*" if impossible. RAW_EVT := dat/${SUBSEC_DIR}/raw.evt ${RAW_EVT}: ${SOURCE_EVT} ${MAKERULES} \ basify-weirdos select-units ${UTYPE_TBL} @echo "${SOURCE_EVT} -> ${RAW_EVT}" cat ${SOURCE_EVT} \ | egrep -v '[;][^'"${TRANS_TAG}"'][>]' \ | sed -e 's/[&][*!][*!][*!][*!;]/*!!!!/g' \ | basify-weirdos \ | select-units \ -v types="${UTYPES}" \ -v table=${UTYPE_TBL} \ > ${RAW_EVT} else # Use the EVT file previously created for the "prs" book, # and this same subsection. Assumes that the "prs" # EVT file contains the same units as ${BOOK} should .PHONY: make-prs RAW_EVT := dat/${LANG}/prs/${SUBSEC}/raw.evt ${RAW_EVT}: make-prs make-prs: ${MAKE} -R -f ${MAKEFILE} ACTION=source \ BOOK=prs SUBSEC=${SUBSEC} ${RAW_EVT} endif # Extract raw token stream with locations from EVT file RAW_LTS := dat/${SUBSEC_DIR}/raw.lts ${RAW_LTS}: ${RAW_EVT} ${MAKERULES} \ words-from-evt @echo "${RAW_EVT} -> ${RAW_LTS}" cat ${RAW_EVT} \ | words-from-evt \ -v showParags=1 \ ${LINE_SEL} \ -v showLocation=1 \ | gawk \ ' BEGIN { c = "f0.P0.0"; } \ /^ *$$/{ print c, "1", "="; next; } \ /./{ c = $$1; print $$1, "2", $$2; next; } \ ' \ > ${RAW_LTS} # Extract raw token stream without locations: RAW_TKS := dat/${SUBSEC_DIR}/raw.tks ${RAW_TKS}: ${RAW_LTS} ${MAKERULES} @echo "${RAW_LTS} -> ${RAW_TKS}" cat ${RAW_LTS} \ | gawk '/./ { print $$3; }' \ > ${RAW_TKS} # Count raw word ocurrences and compute their rel. frequencies: RAW_WFR := dat/${SUBSEC_DIR}/raw.wfr ${RAW_WFR}: ${RAW_TKS} ${MAKERULES} \ compute-freqs @echo "${RAW_TKS} -> ${RAW_WFR}" cat ${RAW_TKS} \ | egrep -v '=' \ | sort | uniq -c | expand \ | sort -b +0 -1nr +1 -2 \ | compute-freqs \ > ${RAW_WFR} # Extract the good words: GUD_WFR := dat/${SUBSEC_DIR}/gud.wfr ${GUD_WFR}: ${RAW_WFR} ${MAKERULES} \ select-good-words @echo "${RAW_WFR} -> ${GUD_WFR}" cat ${RAW_WFR} \ | select-good-words -v inField=3 -v writeBad=0 \ > ${GUD_WFR} # Extract the bad words: BAD_WFR := dat/${SUBSEC_DIR}/bad.wfr ${BAD_WFR}: ${RAW_WFR} ${MAKERULES} \ select-good-words @echo "${RAW_WFR} -> ${BAD_WFR}" cat ${RAW_WFR} \ | select-good-words -v inField=3 -v writeBad=1 \ > ${BAD_WFR} DERIVED_FILES := ${RAW_TKS} ${RAW_LTS} ${RAW_WFR} ${GUD_WFR} ${BAD_WFR} ifeq "OWN_EVT" "YES" DERIVED_FILES := ${RAW_EVT} ${DERIVED_FILES} endif .PHONY: single-subsec ss-${ACTION} single-subsec: dat/${SUBSEC_DIR} exp/${SUBSEC_DIR} ss-${ACTION} ss-source: ${DERIVED_FILES} ss-export: ss-clean: -rm -f ${DERIVED_FILES} endif # End ${ACTION}/${BOOK}/${SUBSEC} rules ###################################################################### endif # End ${ACTION}/${BOOK} rules ###################################################################### endif # End ${ACTION} rules ######################################################################