I translated the unit files to the ECC encoding: mkdir L16-ecc foreach f ( L16/f[0-9]* ) echo "$f -> L16-ecc/${f:t}" cat ${f} \ | fsg2ecc \ > L16-ecc/${f:t} end Let's collect the textual units that comtain text or labels of each hand: cat L16/page-table.dir \ | egrep '^[^:]*:[^:]*:[^:]*:[^:]*:[^:]*(labels|words):' \ > .units-labels.dir cat L16/page-table.dir \ | egrep '^[^:]*:[^:]*:[^:]*:[^:]*:[^:]*parags:' \ > .units-parags.dir cat L16/page-table.dir \ | egrep '^[^:]*:[^:]*:[^:]*:[^:]*:[^:]*(lines|titles):' \ > .units-lines.dir foreach let ( A B X ) set pat = "${let}" if ( "${let}" == "X" ) set pat = '[^:]*\?' cat L16/page-table.dir \ | egrep ':'"${pat}"':[^:]*:[^:]*(labels|words):' \ > .units-labels-${let}.dir cat L16/page-table.dir \ | egrep ':'"${pat}"':[^:]*:[^:]*parags:' \ > .units-parags-${let}.dir cat L16/page-table.dir \ | egrep ':'"${pat}"':[^:]*:[^:]*(lines|titles):' \ > .units-lines-${let}.dir end Let's gather all panel numbers that occur in the text. cat L16/page-table.dir \ | sed -e 's/:.*//g' -e 's/\..*$//g' \ | uniq \ > .panels.dir cat L16/page-table.dir \ | egrep '^[^:]*:[^:]*:[^:]*:[^:]*:[^:]*(labels|words):' \ | sed -e 's/:.*//g' -e 's/\..*$//g' \ > .panels-labels.dir cat L16/page-table.dir \ | egrep '^[^:]*:[^:]*:[^:]*:[^:]*:[^:]*parags:' \ | sed -e 's/:.*//g' -e 's/\..*$//g' \ > .panels-parags.dir cat L16/page-table.dir \ | egrep '^[^:]*:[^:]*:[^:]*:[^:]*:[^:]*(lines|titles):' \ | sed -e 's/:.*//g' -e 's/\..*$//g' \ > .panels-lines.dir From them we create a script to convert panel numbers to sequential page numbers (000 to 263): foreach f ( '' '-parags' '-labels' '-lines' ) echo '#\! /n/gnu/bin/sed -f' \ > panel${f}-to-page cat .panels${f}.dir \ | gawk 'BEGIN {pg=0} /./ {printf"s/<%s>/<%03d>/g\n", $1, pg; pg++}' \ >> panel${f}-to-page chmod a+x panel${f}-to-page end --- panel-to-page ------------------------ #! /n/gnu/bin/sed -f s//<000>/g s//<001>/g s//<002>/g s//<003>/g s//<004>/g s//<005>/g s//<006>/g s//<007>/g ... s//<260>/g s//<261>/g s//<262>/g s//<263>/g s//<264>/g s//<265>/g s//<266>/g ------------------------------------------ Hm... In order to make the results easier to describe, I should rename the textual unit files with standard panel numbers (like f77v) rather than modern page numbers (like 114).