#! /bin/csh -f # Last edited on 2000-10-11 14:32:15 by stolfi set usage = "$0 [ -truncate NUM ] [ -maxtrees NUM ] CLASS SECTION GRAMMAR" # Compares the probabilities generated by a given prob. GRAMMAR # against observed word frequencies, and plots the probability # comparison graph for the two. Then uses the observed frequencies to # adjust the rule probabilites, producing a new grammar GRAMMAR-a, and # compares it too against the observed data. # # The parameter "-truncate" is a probability value that can be used to # prune the enumeration of the grammar (mandatory when the grammar is # recursive). The parameter "-maxtrees" is passed to the grammar # adjustment routine. # # Input files: # # gram/CLASS/SECTION/GRAMMAR.grx # The probabilitic grammar # # prob/obs/SECTION/GRAMMAR.frq # The observed word frequencies # # Output files: # # gram/CLASS/SECTION/GRAMMAR-a.grx # Adjusted grammar. # # prob/gen/CLASS/SECTION/GRAMMAR.prb # prob/gen/CLASS/SECTION/GRAMMAR-a.prb # Predicted probabilities for the # original and adjusted grammar. # # prob/cmp/CLASS/SECTION/GRAMMAR.pr2 # prob/cmp/CLASS/SECTION/GRAMMAR.gif # prob/cmp/CLASS/SECTION/GRAMMAR-a.pr2 # prob/cmp/CLASS/SECTION/GRAMMAR-a.gif # Probability comparison listings and plots. set maxtrees = 1 set truncopt = ( ) while ( ( $#argv > 0 ) && ( "/$1" =~ /-* ) ) if ( ( $#argv >= 2 ) && ( "/$1" == "/-maxtrees" ) ) then set maxtrees = "$2" ; shift; shift; else if ( ( $#argv >= 2 ) && ( "/$1" == "/-truncate" ) ) then set truncopt = ( "-truncate" "$2" ) ; shift; shift; else echo "unknown option "'"'"$1"'"' echo "usage: ${usage}"; exit 1 endif end if ( $#argv < 3) then echo "usage: ${usage}"; exit 1 endif set class = "$1"; shift; set sec = "$1"; shift; set old = "$1"; shift; set parts = ( $* ) set oldgrx = "gram/${class}/${sec}/${old}.grx" set oldobs = "prob/obs/${sec}/${old}.frq" check-grammar ${truncopt} ${class} ${sec} ${old} if ($status != 0) then echo "aborted"; exit 1 endif set new = "${old}-a" set newgrx = "gram/${class}/${sec}/${new}.grx" set newobs = "prob/obs/${sec}/${new}.frq" if ( ! -r ${newobs} ) then ( cd ${newobs:h} && ln -s ${old}.frq ${new}.frq ) endif cat ${oldgrx} \ | parse-and-tally \ -v wordcounts=${oldobs} \ -v ignorecounts=1 \ -v maxtrees=${maxtrees} \ -v countprec=2 \ > ${newgrx} if ($status != 0) then echo "aborted"; exit 1 endif check-grammar ${truncopt} ${class} ${sec} ${new} if ($status != 0) then echo "aborted"; exit 1 endif