#! /bin/sed -f
# Last edited on 1998-07-27 00:46:31 by stolfi
#
# Usage: factor-OK < INFILE > OUTFILE
# Factors EVA strings according to the OKOKOKO paradigm
#
# The input must contain one string perline without comments of blanks.
# The output will have the form {w1}{w2}...{wn} where
# each "w" is a non-null element of the extended OKOKOKO paradigm.
#
# Unreadable characters in the input ("?" or "*") are parsed as "{*}".
# Failures of the OKOKOKO paradigm are finessed by parsing any 
# offending character "c" as "{c?}".
#
#
# Map "sh", "ch", and "ee" to single letters to simplify the parsing.
# Note that "eee" groups are paired off from left end. 
s/ch/C/g
s/sh/S/g
s/ee/E/g
#
# Map platformed and half-platformed letters to capitals to
# simplify the parsing:
s/ckh/K/g
s/cth/T/g
s/cfh/F/g
s/cph/P/g
#
s/ikh/G/g
s/ith/H/g
s/ifh/M/g
s/iph/N/g
#
s/ck/U/g
s/ct/V/g
s/cf/X/g
s/cp/Y/g
#
# Put down scanning head in "#" state.
s/$/#/
:x
# Parse an "O" element
s/\([aoy][aoy]*\)#/#{\1}/
tx
# Parse a "Q" element:
s/\([q]\)#/#{\1}/
tx
# Parse a "K" element. First insert "}" delimiter, copy next main letter and 
# "e" complements, and switch to state "%" if "I"s are allowed,
# or state "#" otherwise:
s/\([CSEktfpKTFPGHMNd][eh]\)#/#{\1}/
tx
s/\([CSEktfpKTFPGHMNUVXYgubxv]\)#/#{\1}/
tx
s/\([rlgmjnsd]\)#/%\1}/
# If in "%" state, attach "i" string to group, go to "#" state: 
s/\(iii\)%/#{\1/
s/\(ii\)%/#{\1/
s/\(i\)%/#{\1/
s/%/#{/
tx
# Treat garbage characters as elements:
s/\([*?]\)#/#{*}/
tx
# Any unrecognized character is an element by itself too:
s/\(.\)#/#{\1?}/
tx
#
# We should exit the loop only in the "#" or "@" states at b-o-l
s/^#//
#
# Unfold letter folding:
s/U/ck/g
s/V/ct/g
s/X/cf/g
s/Y/cp/g
#
s/G/ikh/g
s/H/ith/g
s/M/ifh/g
s/N/iph/g
#
s/K/ckh/g
s/T/cth/g
s/P/cph/g
s/F/cfh/g
#
s/C/ch/g
s/S/sh/g
s/E/ee/g