#! /bin/csh -f 
# Last edited on 1999-12-05 15:59:18 by stolfi

set usage = "$0 FTAG SPLITCHAR"

# Tabulates the pairs of prefixes and suffixes from a frequency
# file of word feature FTAG.  Assumes available the files
#
#   stats-subsecs/FTAG/tot.frq 
#     counts and frequencies of the FTAG values over all sections,
#
#   FTAG-prefixes.dic, FTAG-suffixes.dic 
#     list of official prefixes and suffixes, for tabulation
#
#   smash-FTAG-letters
#     a script that reads a file of counts and FTAGs,
#     and collapses each `letter' of the FTAG
#     to a single "X" (preserving the SPLITCHAR if any).
#
# This script considers ony words of the frequency file that contain
# exactly one instance of the given SPLITCHAR, and separates those words
# into a "prefix" and a "suffix" at that character. Outputs two files,
# stats-subsecs/FTAG/tot-pref-suff.mtx with the counts of
# prefix/suffix combinations, and a similar table
# stats-subsecs/FTAG/tot-pref-suff-len.mtx where the prefixes and
# suffixes have been filtered through smash-FTAG-letters
#

if ( $#argv != 2 ) then
  echo "usage: ${usage}"; exit 1
endif

set ftag = "$1"; shift;
set splitchar = "$1"; shift;

set tmp = "/tmp/$$"

set ifile = "stats-subsecs/${ftag}/tot.frq"

if ( ! ( -r ${ifile} ) ) then
  echo "${ifile} not found"; exit 1
endif

foreach cp ( cat.  smash-${ftag}-letters.-len
  set ccmd = "${cp:r}"
  set ctag = "${cp:e}"
  set prefs = "${ftag}-prefs${ctag}.dic
  if ( ! ( -r ${prefs} ) ) then
    echo "${prefs} not found"; exit 1
  endif
  set suffs = "${ftag}-suffs${ctag}.dic
  if ( ! ( -r ${suffs} ) ) then
    echo "${suffs} not found"; exit 1
  endif
  cat ${ifile} \
    | gawk '/./{print $1, $3;}' \
    | ${ccmd} \
    | egrep '['"${splitchar}"']' \
    | egrep -v '['"${splitchar}"'].*['"${splitchar}"']' \
    | sed -e 's/['"${splitchar}"']/- -/g' \
    | gawk '/./{print $1, $3, $2;}' \
    | count-diword-freqs \
        -v counted=1 \
        -v digits=5 \
        -v rows=${suffs} \
        -v cols=${prefs} \
    > stats-subsecs/${ftag}/tot-pref-suff${ctag}.mtx
end