Obsolete stuff Last edited on 1999-07-28 01:54:26 by stolfi ANOTHER SET OF KEYS FOR THE SCATTER PLOTS Let's make another dictionary with words whose frequencies differ substantially between clusters: set bigsecs = ( bio.1 cos.1 hea.1 heb.1 pha.2 str.2 zod.1 ) foreach etag ( RAW EQV ) set tmpsecs = ( ${bigsecs} ) foreach sa ( ${bigsecs} ) set tmpsecs = ( $tmpsecs[2-] ) foreach sb ( ${tmpsecs} ) echo "${etag}: ${sa} - ${sb}" compute-freq-diffs ${etag}/wfreqs/subsecs/{${sa},${sb}}.frq \ | sort +0 -1gr \ > ${etag}/wfreqs/subsecs/${sa}-${sb}.dfr end end end foreach etag ( RAW EQV ) /bin/rm /tmp/temp-${etag}.dsq /bin/rm -rf ${etag}/plots/maxd mkdir -p ${etag}/plots/maxd set tmpsecs = ( ${bigsecs} ) set npairs = 0 foreach sa ( ${bigsecs} ) set tmpsecs = ( $tmpsecs[2-] ) foreach sb ( ${tmpsecs} ) @ npairs = ${npairs} + 1 echo "${etag}: ${sa} - ${sb}" cat ${etag}/wfreqs/subsecs/${sa}-${sb}.dfr \ | gawk \ ' (($2 \!~ /[?*]/) &&($2 \!~ /^.[~]?$/)){ \ dd = 1000*$1; dd = dd*dd; printf "%7d %s\n", dd, $2;}' \ | egrep -v ' 0 '\ >> /tmp/temp-${etag}.dsq end end cat /tmp/temp-${etag}.dsq \ | sort +1 -2 \ | combine-counts \ | gawk \ -v np=${npairs} \ '/./{ printf "%8.5f %s\n", sqrt($1/np)/1000, $2;}' \ | sort +0 -1gr \ | head -50 \ > ${etag}/plots/maxd/keys.dsq end foreach etag ( RAW EQV ) cat ${etag}/plots/maxd/keys.dsq \ | gawk '/./{print $2;}' | sort \ > ${etag}/plots/maxd/keys.dic list -filter 'fmt -w60' ${etag}/plots/maxd/keys.dic end --- RAW/plots/maxd/keys.dic ------------------------ aiin al ar chckhy chdy chedy cheey cheol chey chol chor chy cthy daiin dain dal dar dol dy lchedy okaiin okal okar okedy okeey okeol ol or otaiin otedy oteey oteody otey qokaiin qokain qokal qokar qokedy qokeedy qokeey qokeol qoky qol sh shedy shey sho shol shor shy ---------------------------------------------------- --- EQV/plots/maxd/keys.dic ------------------------ chctho~ chdo~ chectho~ chedo~ cheedo~ cheeo~ cheol~ cheor~ cheo~ cheto~ choin~ chol~ chor~ cho~ ch~ ctheo~ cthol~ cthor~ ctho~ doin~ dol~ dor~ do~ lchedo~ oin~ ol~ or~ otchdo~ otchedo~ otchol~ otchor~ otcho~ otedo~ oteedo~ oteeo~ oteodo~ oteol~ oteor~ oteos~ oteo~ otod~ otoin~ otolo~ otol~ otor~ oto~ sol~ tcho~ tedo~ tor~ ----------------------------------------------------