# Last edited on 2000-10-11 07:02:52 by stolfi Obtaining component statistics (simple words only), without braces: set statargs = ( \ tc-y/prefix.pm \ tc-y/core.c \ tc-y/suffix.ns \ tc-y/crustmantle.pmns \ \ tf-z/precrust.p \ tf-z/premantle.m \ tf-z/sufmantle.n \ tf-z/sufcrust.s \ tf-z/mantle.mn \ tf-z/crust.ps \ ) foreach sec ( ${secs} txt.n lab.n ) echo "${sec}" foreach dpt ( ${statargs} ) set tagpair = "${dpt:h}" set pt = "${dpt:t}" set part = "${pt:r}" set seltype = "${pt:e}" gather-stats ${sec} ${tagpair} ${seltype} ${part} end end Listing it all: foreach sec ( "hea.1" "bio.1" "str.2" "txt.n" "lab.n" ) if ( ${sec} == "txt.n" ) then set wdtype = "text"; set xsec = "tot"; set wspfile = ( ) else if ( ${sec} == "lab.n" ) then set wdtype = "labs"; set xsec = "tot" set wspfile = ( ) else set wdtype = "text"; set xsec = "${sec}"; set wspfile = ( "data/gud/text/${xsec}.wsp" ) endif dicio-wc \ ${wspfile} \ stats/${wdtype}/{tc-y,tf-z,tw-w}/${xsec}.frq \ prob/obs/${sec}/*.frq end dicio-wc prob/obs/*/word.frq lines words bytes file ------- ------- --------- ------------ 6703 6703 103030 data/gud/text/hea.1.wsp 1039 3117 27061 stats/text/tc-y/hea.1.frq 608 1824 14230 stats/text/tf-z/hea.1.frq 1833 5499 54265 stats/text/tw-w/hea.1.frq 22 66 430 prob/obs/hea.1/core.frq 305 915 6511 prob/obs/hea.1/crust.frq 695 2085 15369 prob/obs/hea.1/crustmantle.frq 20 60 421 prob/obs/hea.1/mantle.frq 60 180 1167 prob/obs/hea.1/precrust.frq 68 204 1354 prob/obs/hea.1/prefix.frq 15 45 305 prob/obs/hea.1/premantle.frq 187 561 3888 prob/obs/hea.1/sufcrust.frq 254 762 5426 prob/obs/hea.1/suffix.frq 13 39 262 prob/obs/hea.1/sufmantle.frq 1980 5940 45165 prob/obs/hea.1/word.frq lines words bytes file ------- ------- --------- ------------ 6555 6555 104974 data/gud/text/bio.1.wsp 744 2232 19346 stats/text/tc-y/bio.1.frq 421 1263 9719 stats/text/tf-z/bio.1.frq 1240 3720 36567 stats/text/tw-w/bio.1.frq 14 42 271 prob/obs/bio.1/core.frq 218 654 4594 prob/obs/bio.1/crust.frq 529 1587 11690 prob/obs/bio.1/crustmantle.frq 12 36 246 prob/obs/bio.1/mantle.frq 50 150 968 prob/obs/bio.1/precrust.frq 67 201 1335 prob/obs/bio.1/prefix.frq 13 39 261 prob/obs/bio.1/premantle.frq 106 318 2153 prob/obs/bio.1/sufcrust.frq 134 402 2777 prob/obs/bio.1/suffix.frq 10 30 198 prob/obs/bio.1/sufmantle.frq 1325 3975 30134 prob/obs/bio.1/word.frq lines words bytes file ------- ------- --------- ------------ 10097 10097 170197 data/gud/text/str.2.wsp 1451 4353 38269 stats/text/tc-y/str.2.frq 786 2358 18476 stats/text/tf-z/str.2.frq 2533 7599 75981 stats/text/tw-w/str.2.frq 22 66 430 prob/obs/str.2/core.frq 394 1182 8428 prob/obs/str.2/crust.frq 974 2922 21800 prob/obs/str.2/crustmantle.frq 24 72 510 prob/obs/str.2/mantle.frq 66 198 1282 prob/obs/str.2/precrust.frq 76 228 1515 prob/obs/str.2/prefix.frq 19 57 389 prob/obs/str.2/premantle.frq 253 759 5297 prob/obs/str.2/sufcrust.frq 379 1137 8248 prob/obs/str.2/suffix.frq 20 60 417 prob/obs/str.2/sufmantle.frq 2779 8337 64286 prob/obs/str.2/word.frq lines words bytes file ------- ------- --------- ------------ 2996 8988 79819 stats/text/tc-y/tot.frq 1561 4683 37317 stats/text/tf-z/tot.frq 5820 17460 175716 stats/text/tw-w/tot.frq 35 105 691 prob/obs/txt.n/core.frq 881 2643 19124 prob/obs/txt.n/crust.frq 2146 6438 48366 prob/obs/txt.n/crustmantle.frq 36 108 773 prob/obs/txt.n/mantle.frq 136 408 2700 prob/obs/txt.n/precrust.frq 178 534 3645 prob/obs/txt.n/prefix.frq 27 81 564 prob/obs/txt.n/premantle.frq 475 1425 10030 prob/obs/txt.n/sufcrust.frq 698 2094 15203 prob/obs/txt.n/suffix.frq 26 78 546 prob/obs/txt.n/sufmantle.frq 6604 19812 153703 prob/obs/txt.n/word.frq lines words bytes file ------- ------- --------- ------------ 519 1557 13325 stats/labs/tc-y/tot.frq 420 1260 9893 stats/labs/tf-z/tot.frq 671 2013 19655 stats/labs/tw-w/tot.frq 22 66 430 prob/obs/lab.n/core.frq 181 543 3949 prob/obs/lab.n/crust.frq 281 843 6253 prob/obs/lab.n/crustmantle.frq 12 36 246 prob/obs/lab.n/mantle.frq 44 132 854 prob/obs/lab.n/precrust.frq 40 120 785 prob/obs/lab.n/prefix.frq 9 27 179 prob/obs/lab.n/premantle.frq 158 474 3335 prob/obs/lab.n/sufcrust.frq 193 579 4152 prob/obs/lab.n/suffix.frq 13 39 263 prob/obs/lab.n/sufmantle.frq 721 2163 16460 prob/obs/lab.n/word.frq TRIVIAL GRAMMARS Creating "trivial" (straight enumeration) grammars: set parts = ( core premantle sufmantle mantle precrust sufcrust crust prefix suffix word ) foreach sec ( hea.1 bio.1 str.2 lab.n txt.n ) foreach part ( ${parts} ) set gram = "gram/trivial/${sec}/${part}.grx" echo ${gram} mkdir -p ${gram:h} echo '# Last edited on DATE TIME by USER' > ${gram} echo '# Trivial grammar for '"${part}" >> ${gram} echo "${part}"':' | tr 'a-z' 'A-Z' >> ${gram} cat prob/obs/${sec}/${part}.frq >> ${gram} end end Trivial check of the enumeration procedure: set sec = "bio.1" set class = "trivial" mkdir prob/gen/${class} mkdir prob/cmp/${class} foreach part ( ${parts} ) echo ${class}/${sec}/${part} set grxfile = "gram/${class}/${sec}/${part}.grx" set genfile = "prob/gen/${class}/${sec}/${part}.prb" cat ${grxfile} \ | enum-language \ > ${genfile} compare-probs ${class} ${sec} ${part} ${part} end