#! /bin/csh -f # Last edited on 1998-07-14 23:19:48 by stolfi set usage = "$0 NAME INI FIN SYMBOL..." # Applies the Reeds compression cycle several times # starting from file NAME-INI.sig until NAME-FIN.sig # (where INI and FIN are 2-digit numbers). # Uses the list of SYMBOLs as replacements. if ( $#argv < 3 ) then echo "usage: ${usage}"; exit 1 endif set name = "$1"; shift; set ini = "$1"; shift; set fin = "$1"; shift; set symbs = ( $* ) set n = `printf "%02d" ${ini}` while( ${n} < ${fin} ) echo " " echo " counting digraphs in ${name}-${n}.sig..." cat ${name}-${n}.sig \ | gather-tuples \ -v order=2 \ -v filler='_' \ -v lowercase=0 \ | grep -v '_' \ | sort | uniq -c | expand \ | sort +0 -1nr \ > ${name}-${n}.cts set top = ( `head -1 ${name}-${n}.cts` ) @ m = ${n} + 1 set m = `printf "%02d" $m` set s = $symbs[1]; set symbs = ( $symbs[2-] ) printf "# %s = %s %7d\n" "$s" "$top[2]" "$top[1]" > ${name}-${m}.dic printf " %s = %s %7d\n" "$s" "$top[2]" "$top[1]" echo " replacing ${top[2]} by $s in ${name}-${n}.sig..." cat ${name}-${n}.sig \ | replace-signif-digraph "$top[2]" "$s" \ > ${name}-${m}.sig echo " reconstructing ${name}-${m}.txt..." cat ${name}-${m}.sig \ | sed -e 's/^.//' \ | tr -d '\012' \ | tr '\015' '\012' \ > ${name}-${m}.txt set n = "$m" end