#! /bin/sed -f # Last edited on 1998-07-27 00:46:31 by stolfi # # Usage: factor-OK < INFILE > OUTFILE # Factors EVA strings according to the OKOKOKO paradigm # # The input must contain one string perline without comments of blanks. # The output will have the form {w1}{w2}...{wn} where # each "w" is a non-null element of the extended OKOKOKO paradigm. # # Unreadable characters in the input ("?" or "*") are parsed as "{*}". # Failures of the OKOKOKO paradigm are finessed by parsing any # offending character "c" as "{c?}". # # # Map "sh", "ch", and "ee" to single letters to simplify the parsing. # Note that "eee" groups are paired off from left end. s/ch/C/g s/sh/S/g s/ee/E/g # # Map platformed and half-platformed letters to capitals to # simplify the parsing: s/ckh/K/g s/cth/T/g s/cfh/F/g s/cph/P/g # s/ikh/G/g s/ith/H/g s/ifh/M/g s/iph/N/g # s/ck/U/g s/ct/V/g s/cf/X/g s/cp/Y/g # # Put down scanning head in "#" state. s/$/#/ :x # Parse an "O" element s/\([aoy][aoy]*\)#/#{\1}/ tx # Parse a "Q" element: s/\([q]\)#/#{\1}/ tx # Parse a "K" element. First insert "}" delimiter, copy next main letter and # "e" complements, and switch to state "%" if "I"s are allowed, # or state "#" otherwise: s/\([CSEktfpKTFPGHMNd][eh]\)#/#{\1}/ tx s/\([CSEktfpKTFPGHMNUVXYgubxv]\)#/#{\1}/ tx s/\([rlgmjnsd]\)#/%\1}/ # If in "%" state, attach "i" string to group, go to "#" state: s/\(iii\)%/#{\1/ s/\(ii\)%/#{\1/ s/\(i\)%/#{\1/ s/%/#{/ tx # Treat garbage characters as elements: s/\([*?]\)#/#{*}/ tx # Any unrecognized character is an element by itself too: s/\(.\)#/#{\1?}/ tx # # We should exit the loop only in the "#" or "@" states at b-o-l s/^#// # # Unfold letter folding: s/U/ck/g s/V/ct/g s/X/cf/g s/Y/cp/g # s/G/ikh/g s/H/ith/g s/M/ifh/g s/N/iph/g # s/K/ckh/g s/T/cth/g s/P/cph/g s/F/cfh/g # s/C/ch/g s/S/sh/g s/E/ee/g