#! /bin/sed -f # Map "sh", "ch", and "ee" to single letters to simplify the parsing. # Note that "eee" groups are paired off from left end. s/ch/C/g s/sh/S/g s/ee/E/g # Map platformed and half-platformed letters to capitals to simplify the parsing: s/ckh/K/g s/cth/T/g s/cfh/F/g s/cph/P/g # s/ikh/G/g s/ith/H/g s/ifh/M/g s/iph/N/g # s/ck/U/g s/ct/V/g s/cf/X/g s/cp/Y/g # Put down scanning head in "#" state s/$/#/ :x # If in "#" state, copy next main letter and "e" complements, # insert "}" delimiter, and switch to state "%" if "I"s are allowed, # or state "=" otherwise: s/\([CSEktfpKTFPd]e\)#/=\1}/ s/\([CSEktfpKTFPGHMNUVXYgubx]\)#/=\1}/ s/\([rlgmjnsd]\)#/%\1}/ # The "q" is strictly word-initial, and an element by itself: s/\([q]\)#/=\1}/ # An unattached [aoy] is an element by itself too: s/\([aoy]\)#/=\1}/ # Any unrecognized character is an element by itself too: s/\([?]\)#/=\1}/ s/\(.\)#/=\1?}/ # If in "%" state, attach "i" string to group, go to "=" state: s/\(iii\)%/=\1/ s/\(ii\)%/=\1/ s/\(i\)%/=\1/ s/%/=/ # If in "=" state, insert "{" delimiter, go back to "#" state: s/=/#{/ tx # We should exit the loop only in the "#" state at b-o-l. # Discard scanning head if done: s/^#// # Unfold letter folding: s/U/ck/g s/V/ct/g s/X/cf/g s/Y/cp/g # s/G/ikh/g s/H/ith/g s/M/ifh/g s/N/iph/g # s/K/ckh/g s/T/cth/g s/P/cph/g s/F/cfh/g # s/C/ch/g s/S/sh/g s/E/ee/g