Hacking at the Voynich manuscript - Side notes 024 Creating a new interim release of Landini's interlinear in EVA Last edited on 1998-12-28 09:05:03 by stolfi GENERAL RECIPE Gathering the data: ln -s ../../L16+H-eva Parameters: set ver = "16e6" set dt = `date +%y-%m-%d` set exdir = "/n/ftp/pub/staff/stolfi/EXPORT/projects/voynich/${dt}-interln${ver}" Creating the export directory mkdir ${exdir} List of units: /bin/rm -f .all.units cat L16+H-eva/UNITS \ | gawk -v FS=':' '/^[^#]/{printf "L16+H-eva/%s\n", $2;}' \ > .all.units Checking for missing units in index: ls -d L16+H-eva/f* \ | egrep '^L16\+H-eva/f[0-9]+[rv]?[0-6]?(|[.][A-Za-z0-9]+)$' \ | sort \ > .all.files cat .all.units \ | sort \ > .all.srtun diff .all.{files,srtun} Validating format: foreach file ( `cat .all.units` ) set fu = "${file:t}" echo '=== '$file' ===' validate-new-evt-format \ -v fnum="${fu:r}" \ -v unit="${fu:e}" \ ${file} end List of units that contain only comments: cat L16+H-eva/UNITS \ | gawk -v FS=':' '($6=="-"){printf "L16+H-eva/%s\n", $2;}' \ > .cmt.units Checking for mislabeled comment files: ls -d L16+H-eva/f* \ | egrep '^L16\+H-eva/f[0-9]+[rv][0-6]?$' \ | sort \ > .cmt.files cat .cmt.units \ | sort \ > .cmt.srtun diff .cmt.{files,srtun} Unit index file: cp -p L16+H-eva/UNITS L16+H-eva/unit${ver}.txt cp -p L16+H-eva/unit${ver}.txt ${exdir}/ Archives with separate file for each unit: tar cvf - L16+H-eva/UNITS `cat .all.units` \ | gzip \ > ${exdir}/arch${ver}.tgz rm -f ${exdir}/arch${ver}.zip zip -klv ${exdir}/arch${ver} L16+H-eva/UNITS `cat .all.units` All the text in a single file: cat `cat .all.units` \ | sed -e '/^## *<[^<>.]*>/s/^## *//' \ | egrep -v '^ *$' \ > L16+H-eva/text${ver}.evt cp -p L16+H-eva/text${ver}.evt ${exdir}/ rm -f ${exdir}/text${ver}.evt.gz gzip ${exdir}/text${ver}.evt rm -f ${exdir}/text${ver}.zip zip -klv ${exdir}/text${ver} L16+H-eva/text${ver}.evt Edits made to Landini's original version: cat L16+H-eva/f0.{M,S,U,V,W} \ > ${exdir}/edit${ver}.txt Comments only, separate files: tar cvf - L16+H-eva/UNITS `cat .cmt.units` \ | gzip \ > ${exdir}/acmt${ver}.tgz rm -f ${exdir}/acmt${ver}.zip zip -klv ${exdir}/acmt${ver} L16+H-eva/UNITS `cat .cmt.units` Comments only, single file: cat `cat .cmt.units` \ | sed -e '/^## *<[^<>.]*>/s/^## *//' \ | egrep -v '^ *$' \ > L16+H-eva/tcmt${ver}.evt cp -p L16+H-eva/tcmt${ver}.evt ${exdir}/ rm -f ${exdir}/tcmt${ver}.evt.gz gzip ${exdir}/tcmt${ver}.evt rm -f ${exdir}/tcmt${ver}.zip zip -klv ${exdir}/tcmt${ver} L16+H-eva/tcmt${ver}.evt Listing differences: set exold = "/n/ftp/pub/staff/stolfi/EXPORT/projects/voynich/98-10-20-interln16e5" set old = 16e5 cat ${exdir}/text${ver}.evt.gz \ | gunzip \ > .new.evt cat ${exold}/text${old}.evt.gz \ | gunzip \ > .old.evt diff .old.evt .new.evt \ | prettify-diff-output \ > .diffs.evt Comparing UNITS files: cat L16+H-eva/UNITS \ | gawk -v FS=':' -v OFS=':' \ '//{$1=""; print;}' \ > .new.units cat ${exold}/unit${old}.txt \ | gawk -v FS=':' -v OFS=':' \ '//{$1=""; print;}' \ > .old.units diff .old.units .new.units \ | prettify-diff-output \ > .diffs.units Checking new transcriptions per page: foreach f ( new old ) echo $f cat .${f}.evt \ | egrep '^<[^<>]*;' \ | egrep -v ';H>' \ | sed \ -e 's/<\(f[0-9][0-9]*[rv][0-6]*\)[.][^<>]*[;]/<\1 /' \ -e 's/[-\!%=.,]//g' \ -e 's/{[^{}]*}//g' \ -e 's/[ ][ ][ ]*/ /g' \ | sort +0 -1 +2 -3 \ > .${f}.esr end diff .old.esr .new.esr \ | prettify-diff-output \ > .diffs.esr SPECIAL HACKS Inserting the unit locator line at the top of each file: pushd L16+H-eva foreach f ( `ls | egrep '^f[0-9]+[rv]?[0-6]?(|[.][A-Za-z0-9]+)$'` ) echo '=== '${f}' ===' set fnum = "${f:r}" set unit = "${f:e}" if ( "x${unit}" != "x" ) then if ( -r ${f}~ ) mv ${f}~ ${f}~~ cat ${f} \ | sed -e '1s/^/## <'"${f}"'> {} /' \ | tr '\015' '\012' \ > "${f}@" mv -i "${f}" "${f}~" && mv -i "${f}@" "${f}" endif end foreach f ( `ls | sed -e 's/~$//' | egrep '^f[0-9]+[rv]?[0-6]?(|[.][A-Za-z0-9]+)$'` ) echo '=== '${f}' ===' mv -i "${f}~" "$f" end popd