#! /bin/csh -f # Last edited on 2000-10-11 18:57:33 by stolfi set usage = "$0 [ -truncate NUM ] GRCLASS SECTION GRAMMAR" # Compares the probabilities generated by a given probabilistic # GRAMMAR against observed word frequencies, and plots the probability # comparison graph for the two. # # The parameter "-truncate" is a probability value that can be used to # prune the enumeration of the grammar (mandatory when the grammar is # recursive). # # Input files: # # gram/GRCLASS/SECTION/GRAMMAR.grx # The probabilitic grammar # # prob/obs/SECTION/GRAMMAR.frq # The observed word frequencies # # Output files: # # prob/gen/GRCLASS/SECTION/GRAMMAR.prb # Word probabilities as predicted by the grammar. # # prob/cmp/GRCLASS/SECTION/GRAMMAR.pr2 # prob/cmp/GRCLASS/SECTION/GRAMMAR.gif # Probability comparison listings and plots. set truncopt = ( ) while ( ( $#argv > 0 ) && ( "/$1" =~ /-* ) ) if ( ( $#argv >= 2 ) && ( "/$1" == "/-truncate" ) ) then set truncopt = ( -v "truncate=$2" ) ; shift; shift; else echo "unknown option "'"'"$1"'"' echo "usage: ${usage}"; exit 1 endif end if ( $#argv != 3) then echo "usage: ${usage}"; exit 1 endif set grclass = "$1"; shift; set sec = "$1"; shift; set gram = "$1"; shift; set grxfile = "gram/${grclass}/${sec}/${gram}.grx" set obsfile = "prob/obs/${sec}/${gram}.frq" set genfile = "prob/gen/${grclass}/${sec}/${gram}.prb" set cmpfile = "prob/cmp/${grclass}/${sec}/${gram}.pr2" echo "enumerating language of ${grxfile}" cat ${grxfile} \ | enum-language ${truncopt} \ > ${genfile} if ($status != 0) then echo "aborted"; exit 1 endif compare-probs ${grclass} ${sec} ${gram} ${gram} dicio-wc ${genfile} ${cmpfile} if ($status != 0) then echo "aborted"; exit 1 endif