#! /bin/csh -f # Last edited on 2008-06-15 20:58:52 by stolfi set usage = "$0 [-sort {NUM}] [-freqs|-cumFreqs|-remFreqs] [-totals] [-titles '{T1} ... {Tn} {Tw}'] [-widths '{W1} ... {Wn}'] {INFILE} ... > {OUTFILE}" # Merges two or more count files # Assumes each line of each {INFILE} has fields {COUNT} {WORD} # where {COUNT} is a number (integer or fraction) # # If "-sort" is given, sorts by the {COUNT} of the {NUM}th file decreasing, # else by the {WORD} increasing. # # If "-freqs" is given, prints frequencies ×999 instead of counts. # # WARNING: assumes the {WORD} does not contain blanks. set tit = ( ) set minwds = ( 1 ) set maxwd = 999 set mxl = 999999999 set srt = ( sort +0 -1 ) set tot = ( ) set frop = ( ) set path = ( ${STOLFIHOME}/voynich/work $path ) while ( ( $#argv > 0 ) && ( "x$1" =~ x-* ) ) if ( ( $#argv >= 1 ) && ( "x$1" == "x-totals" ) ) then set tot = ( -v totals=1 ); shift; else if ( ( $#argv >= 1 ) && ( "x$1" == "x-freqs" ) ) then set frop = ( -v freqs=1 ); set maxwd = 3; shift; else if ( ( $#argv >= 1 ) && ( "x$1" == "x-cumFreqs" ) ) then set frop = ( -v cumFreqs=1 ); set maxwd = 3; shift; else if ( ( $#argv >= 1 ) && ( "x$1" == "x-remFreqs" ) ) then set frop = ( -v remFreqs=1 ); set maxwd = 3; shift; else if ( ( $#argv >= 2 ) && ( "x$1" == "x-titles" ) ) then set tit = ( $2 ); shift; shift; else if ( ( $#argv >= 2 ) && ( "x$1" == "x-widths" ) ) then set minwds = ( `echo $2` ); shift; shift; else if ( ( $#argv >= 2 ) && ( "x$1" == "x-sort" ) ) then set srt = ( sort -k$2,$2gr ); shift; shift; else if ( ( $#argv >= 2 ) && ( "x$1" == "x-maxlines" ) ) then set mxl = "$2"; shift; shift; else echo "invalid option $1" echo "usage: ${usage}"; exit 1 endif end if ( $#argv == 0 ) then echo "usage: ${usage}"; exit 1 endif set files = ( $* ) @ foo = 1 + $#files if (($#tit > 0) && ($#tit != $foo)) then echo "wrong num of titles $#tit (must be 1 + num files)"; exit 1 endif set tmp = "/tmp/$$" # Compute maximum width of count fields of each file set wds = ( ) @ n = 0 while ($n < $#files) @ n = $n + 1 if ( $maxwd == 999 ) then set wd = "`cat $files[$n] | get-max-field-width -v field=1`" else set wd = "$maxwd" endif if ( $n <= $#minwds ) then set minwd = "$minwds[$n]" else set minwd = "$minwds[$#minwds]" endif if ( ${wd} < ${minwd} ) set wd = "${minwd}" set wds = ( ${wds} ${wd} ) end join-counts ${files} \ | ${srt} \ | format-multi-counts \ ${tot} \ ${frop} \ -v titles="${tit}" \ -v maxLines="${mxl}" \ -v widths="${wds}"