Over the weekend I have pored over the list of most common words. They can be gruped into the following major sets, based on prefixes: qoljc*/qoqjc* cccc*/csccc* oljc*/oqjc* cgc* ixcccc*/ixcsccc* cccljc*/csccljc*/cccqjc* plus the following short connectives: oix qoix ois csoix oixcg ixoix csciiiu csciiiiu ciiiiu To confirm some hunches about gallows, I tabulated all \lj/ and \qj/ gallows letters and their neighboring \c/ strings: cat bio-j-jsa-gut.wds \ | sed -e 's/cs/c/g' \ | enum-contexts -vPAT='c*qjc*' -vLCTX=0 -vRCTX=1 \ | wfreq \ > .foo cat bio-j-jsa-gut.wds \ | sed -e 's/cs/c/g' \ | enum-contexts -vPAT='c*ljc*' -vLCTX=0 -vRCTX=1 \ | wfreq \ > .bar pr -m -s' ' -t -i' '1 .foo .bar > .baz \qj/ gallows \lj/ gallows ----------------- ------------------- 129 0.18 qjccg 251 0.16 ljccg 79 0.11 qjcccg 244 0.16 ljcccg 44 0.06 qjcy 131 0.08 ljcccy 38 0.05 cccqjccy 100 0.06 ljcy 36 0.05 qjcccy 55 0.04 ljccy 31 0.04 qjo 54 0.03 cccljccy 29 0.04 ccccqjccy 44 0.03 ccccljccy 26 0.04 qjccccg 42 0.03 ljo 24 0.03 qjccy 26 0.02 ljccccg 14 0.02 qjccccy 25 0.02 ljccccy 10 0.01 cccqjcy 20 0.01 cccljcy 10 0.01 ccccqjcy 15 0.01 ccccljcy 8 0.01 qjco 12 0.01 cccljcccg 6 0.01 cqjcccy 11 0.01 ljco 5 0.01 qjcco 9 0.01 cccljcccy 4 0.01 qjccco 8 0.01 cccljccg 4 0.01 ccqjcy 7 0.00 cljccy 4 0.01 cccqjcccy 7 0.00 cljcccy 3 0.00 qjccci 7 0.00 cccljci 3 0.00 qj 5 0.00 lji 3 0.00 cqjco 5 0.00 ljcco 3 0.00 cqjccy 5 0.00 cljcccg 3 0.00 cqjccg 5 0.00 ccljci 3 0.00 cqjcccg 5 0.00 ccccljcccy 3 0.00 cccqjccg 5 0.00 ccccljcccg 3 0.00 cccqjcccg 4 0.00 ljcccccy 3 0.00 ccccqjcccg 4 0.00 ccclj 2 0.00 qjcg 3 0.00 lj 2 0.00 cqjcy 3 0.00 ccljcy 2 0.00 ccqjo 2 0.00 ljcci 2 0.00 ccqjci 2 0.00 ljccco 1 0.00 qji 2 0.00 ljcccccg 1 0.00 qjcccccy 2 0.00 cljci 1 0.00 qjccc 2 0.00 cljccg 1 0.00 qjcc 2 0.00 ccljccg 1 0.00 cqjci 2 0.00 ccccljcci 1 0.00 cqjcco 2 0.00 ccccljccg 1 0.00 cqjcci 1 0.00 ljcg 1 0.00 cqjccccg 1 0.00 ljccci 1 0.00 cqjccc 1 0.00 ljccccl 1 0.00 ccqjccg 1 0.00 ccljco 1 0.00 ccqjcccy 1 0.00 ccljcccy 1 0.00 cccqjci 1 0.00 ccljcccg 1 0.00 ccccqjci 1 0.00 cccljco 1 0.00 ccccqjcccy 1 0.00 cccljcci 1 0.00 cccccqjccc 1 0.00 cccljccccy ----- ---- ---- 1 0.00 cccljccccg 700 1.00 TOT 1 0.00 cccljc 1 0.00 ccccljco 1 0.00 cccccljccy ----- ---- ---- 1566 1.00 TOT Now let's check the significance of the \s/ plume on \c/. First, let's list all initial \c/-strings that have plumes against all that don't have them: cat bio-j-jsa-gut.wds \ | sed \ -e 's/cs/z/g' \ -e 's/^/_/g' \ -e 's/$/_/g' \ | enum-contexts -vPAT='_c*z[zc]*[^zc]' -vLCTX=0 -vRCTX=0 \ | wfreq \ > .foo cat bio-j-jsa-gut.wds \ | sed \ -e 's/cs/z/g' \ -e 's/^/_/g' \ -e 's/$/_/g' \ | enum-contexts -vPAT='_c*c[^zc]' -vLCTX=0 -vRCTX=0 \ | wfreq \ > .bar pr -m -s' ' -t -i' '1 .foo .bar > .baz "z"=\cs/ prefixes "c" prefixes ------------------ ------------------ 219 0.30 _zcccg 364 0.32 _cg 71 0.10 _zcccy 192 0.17 _ccccg 69 0.09 _zci 95 0.08 _cy 63 0.08 _zo 67 0.06 _ccccy 54 0.07 _zccl 66 0.06 _ci 38 0.05 _zccccg 60 0.05 _cccl 34 0.05 _zcccl 37 0.03 _cccq 30 0.04 _zcco 31 0.03 _cccccy 26 0.04 _zccq 29 0.03 _ccccl 23 0.03 _zccccy 25 0.02 _ccco 22 0.03 _zcccq 23 0.02 _ccccq 19 0.03 _zco 22 0.02 _cccg 9 0.01 _zccy 20 0.02 _cccccg 8 0.01 _cccz_ 19 0.02 _cq 7 0.01 _zcci 17 0.01 _cco 6 0.01 _zccg 13 0.01 _ccci 6 0.01 _zccci 11 0.01 _cccci 6 0.01 _z_ 10 0.01 _cci 4 0.01 _zzcccg 9 0.01 _ccl 3 0.00 _zcq 8 0.01 _cl 3 0.00 _zcl 8 0.01 _ccq 3 0.00 _zccco 6 0.01 _cccy 3 0.00 _ccccz_ 1 0.00 _cccco 2 0.00 _zl 1 0.00 _ccccco 1 0.00 _zzcl 1 0.00 _ccccci 1 0.00 _zzcco ----- ---- ---- 1 0.00 _zzccl 1135 1.00 TOT 1 0.00 _zzcccy 1 0.00 _zzcccl 1 0.00 _zq 1 0.00 _zi 1 0.00 _zcy 1 0.00 _zccz_ 1 0.00 _zcccz_ 1 0.00 _zccccq 1 0.00 _zc_ 1 0.00 _ccczcy 1 0.00 _ccczci ----- ---- ---- 742 1.00 TOT Let's do it again with whole words: cat bio-j-jsa-gut.wds \ | sed -e 's/cs/z/g' \ | egrep '^z' \ | wfreq \ > .foo cat bio-j-jsa-gut.wds \ | sed -e 's/cs/z/g' \ | egrep '^c' \ | wfreq \ > .bar pr -m -s' ' -t -i' '1 .foo .bar > .baz "z"=\cs/ words "c" words ------------------ ------------------ 204 0.28 zcccgcy 172 0.15 ccccgcy 69 0.09 zcccy 73 0.06 cgciiiiu 36 0.05 zccccgcy 67 0.06 ccccy 31 0.04 zciiiiu 51 0.04 cgciis 25 0.03 zoix 50 0.04 cgciix 24 0.03 zccljccy 33 0.03 cgcy 23 0.03 zccccy 31 0.03 cccccy 20 0.03 zcccljccy 29 0.03 cccljccy 17 0.02 zccoix 21 0.02 cccqjccy 14 0.02 zciix 20 0.02 ciiiiu 13 0.02 zccqjccy 19 0.02 cccccgcy 11 0.02 zcoix 18 0.02 ccccljccy 11 0.02 zciis 17 0.01 cgzcccgcy 11 0.02 zcccqjccy 17 0.01 cgoix 10 0.01 zois 17 0.01 cccoix 10 0.01 zccljcy 17 0.01 ccccqjccy 9 0.01 zccy 16 0.01 cccgcy 9 0.01 zccois 12 0.01 ciix 7 0.01 zcccqjcy 12 0.01 cgciiiu 6 0.01 zcccgciix 11 0.01 ccoix 6 0.01 z 9 0.01 cyljcccgcy 5 0.01 zccljcccy 9 0.01 cgccccgcy 5 0.01 zccljcccgcy 9 0.01 ccccljcy 5 0.01 zcciis 8 0.01 cyzcccgcy 5 0.01 zccgcy 8 0.01 cccz 5 0.01 zcccljcy 8 0.01 cccljcy 4 0.01 zzcccgcy 7 0.01 cyqjccgcy 4 0.01 zccqjcy 7 0.01 ciis 3 0.00 zoixljcccy 6 0.01 cyljcccy 3 0.00 zoixljcccgcy 6 0.01 cgciixcy 3 0.00 zoiiiu 6 0.01 cccois 3 0.00 zcoixcgcy 5 0.00 cyljccgcy 3 0.00 zciiis 5 0.00 ciiiu 3 0.00 zccqjcccy 5 0.00 cgciij 3 0.00 zcclj 5 0.00 cccy 3 0.00 zcccoix 5 0.00 cccljccgcy 3 0.00 zcccljcccgcy 5 0.00 cccljcccgcy 3 0.00 zccciix 5 0.00 ccccgciiiiu 3 0.00 zccciis 5 0.00 ccccg 3 0.00 zcccgciiiiu 4 0.00 cyzcccy 2 0.00 zoljcccgcy 4 0.00 cyccccgcy 2 0.00 zoixccccy 4 0.00 cgciiscy 2 0.00 zoixccccgcy 4 0.00 cgcccgcy 2 0.00 zcljcy 4 0.00 ccix 2 0.00 zcix 4 0.00 cccqjcy 2 0.00 zccqgcccy 4 0.00 cccljcccy 2 0.00 zccljccgcy 4 0.00 ccciix 2 0.00 zcccqgccy 4 0.00 ccciis 2 0.00 zcccljcciix 4 0.00 cccciix 2 0.00 zcccljccgcy 4 0.00 cccciis 2 0.00 zcccljcccy 3 0.00 cyzccccy 2 0.00 zcccg 3 0.00 cyqjcccy 1 0.00 zzcljcccgcy 3 0.00 cyqjcccgcy 1 0.00 zzccoix 3 0.00 cqjcccy 1 0.00 zzccljcy 3 0.00 cqjcccgcy 1 0.00 zzcccy 3 0.00 cqgcccy 1 0.00 zzcccljccy 3 0.00 ciixcy 1 0.00 zqgcccy 3 0.00 ciij 1 0.00 zoljoix 3 0.00 cgois 1 0.00 zoixzcccy 3 0.00 cgix 1 0.00 zoixqjcccgcy 3 0.00 cgciixcgcy 1 0.00 zoixois 3 0.00 cgcccoix 1 0.00 zoixljcy 3 0.00 cccqjccgcy 1 0.00 zoixljccy 3 0.00 cccgciis 1 0.00 zoixljccgcy 3 0.00 ccccz 1 0.00 zoixcy 3 0.00 ccccqjcy 1 0.00 zoixcgcy 2 0.00 cyqjciiiiu 1 0.00 zoixccljciix 2 0.00 cyljciiiiu 1 0.00 zoixcccoix 2 0.00 cljccgcy 1 0.00 zocljcccy 2 0.00 ciisoix 1 0.00 zocgciis 2 0.00 ciiiiucy 1 0.00 zljciis 2 0.00 cgzcccy 1 0.00 zljcccy 2 0.00 cgzccccy 1 0.00 zixz 2 0.00 cgljccgcy 1 0.00 zcy 2 0.00 cgcyljcccgcy 1 0.00 zcqjoix 2 0.00 cgciixzccgcy 1 0.00 zcqjcy 2 0.00 cgciixo 1 0.00 zcqjcccy 2 0.00 cgciiis 1 0.00 zcoljzccgcy 2 0.00 cgci 1 0.00 zcoljcy 2 0.00 cgccoix 1 0.00 zcoljciiiiu 2 0.00 cgccgcy 1 0.00 zcocqgcccgcy 2 0.00 cgcccy 1 0.00 zcocljccy 2 0.00 ccqjcy 1 0.00 zcljcoix 2 0.00 ccljccgcy 1 0.00 zcixcgcy 2 0.00 cccqjcccgcy 1 0.00 zcis 2 0.00 cccqgcccy 1 0.00 zciljciiiiu 2 0.00 cccljciis 1 0.00 zciixljcccy 2 0.00 ccciij 1 0.00 zciixcy 2 0.00 cccciixcy 1 0.00 zciixccccg 2 0.00 ccccgoix 1 0.00 zciisciix 2 0.00 ccccgciix 1 0.00 zciiiuo 2 0.00 ccccgciis 1 0.00 zccz 1 0.00 cyzccciixcgcy 1 0.00 zccqjciis 1 0.00 cyzccccgcy 1 0.00 zccqjcccyix 1 0.00 cyzcccccy 1 0.00 zccqjcccgcy 1 0.00 cyqjcy 1 0.00 zccqgccccgcy 1 0.00 cyqjciix 1 0.00 zccoljcy 1 0.00 cyqjciis 1 0.00 zccoixoix 1 0.00 cyqjciiiu 1 0.00 zccoixo 1 0.00 cyqjccy 1 0.00 zccoixcgcy 1 0.00 cyqjcccgciis 1 0.00 zccljciix 1 0.00 cyoljcy 1 0.00 zccljciij 1 0.00 cyljzccoix 1 0.00 zccljciiiiu 1 0.00 cyljzcccgcy 1 0.00 zccljciiiiiu 1 0.00 cyljciix 1 0.00 zccljccccy 1 0.00 cyljciis 1 0.00 zcciix 1 0.00 cyljciiiu 1 0.00 zcciij 1 0.00 cyljcccgciis 1 0.00 zccgciix 1 0.00 cyljcccccy 1 0.00 zcccz 1 0.00 cylgccccy 1 0.00 zcccyljcy 1 0.00 cylgccccgcy 1 0.00 zcccyis 1 0.00 cyixois 1 0.00 zcccqjcccgcy 1 0.00 cyixcccgcy 1 0.00 zcccqjcccgcccy 1 0.00 cyiscy 1 0.00 zcccgoix 1 0.00 cycqgciiiiu 1 0.00 zcccgciixcgcy 1 0.00 cycljcccy 1 0.00 zcccgciis 1 0.00 cyciis 1 0.00 zcccgciij 1 0.00 cycgcy 1 0.00 zccccqjcccy 1 0.00 cycgciiszcccy 1 0.00 zccccgciiis 1 0.00 cycgciisciix 1 0.00 zccccg 1 0.00 cycgciiiiu 1 0.00 zc 1 0.00 cycccoix ----- ---- ---- 1 0.00 cyccccz 729 1.00 TOT 1 0.00 cyccccy 1 0.00 cyccccqjccy 1 0.00 cyccccljcccy 1 0.00 cyccccgciis 1 0.00 cyccccg 1 0.00 cycccccy 1 0.00 cycccccgcy 1 0.00 cy 1 0.00 cqjcy 1 0.00 cqjcoix 1 0.00 cqjcois 1 0.00 cqjcciix 1 0.00 cqjccgcy 1 0.00 cqgcoix 1 0.00 cqgciixoiis 1 0.00 cqgcciix 1 0.00 cqgcciis 1 0.00 cqgccgois 1 0.00 cljciix 1 0.00 cljccy 1 0.00 cljcccy 1 0.00 cljcccgcy 1 0.00 clgccccy 1 0.00 clgccccgcy 1 0.00 cizcy 1 0.00 ciqjccy 1 0.00 ciixzcccz 1 0.00 ciixoix 1 0.00 ciixoiscy 1 0.00 ciixciixcgcy 1 0.00 ciixcccgcy 1 0.00 ciixccccgcy 1 0.00 ciisois 1 0.00 ciiscy 1 0.00 ciisciis 1 0.00 ciiis 1 0.00 cgzcoixcycg 1 0.00 cgzcoix 1 0.00 cgzcois 1 0.00 cgzccoix 1 0.00 cgzccgcy 1 0.00 cgzcccz 1 0.00 cgzccccgcy 1 0.00 cgzccccgciix 1 0.00 cgoqjcccy 1 0.00 cgoljccgcy 1 0.00 cgoixljccgcy 1 0.00 cgoixlgccccgcy 1 0.00 cgoixccccgcy 1 0.00 cgoixcccccgcy 1 0.00 cgoisciiiiu 1 0.00 cgljzcccy 1 0.00 cgljcccy 1 0.00 cgisoix 1 0.00 cgcyqjccy 1 0.00 cgcyqjccgcy 1 0.00 cgcyljzccy 1 0.00 cgcyljcy 1 0.00 cgcyljciiiiu 1 0.00 cgcyljccgcy 1 0.00 cgcyij 1 0.00 cgciixzcccgcy 1 0.00 cgciixoix 1 0.00 cgciixljcy 1 0.00 cgciixcyiscg 1 0.00 cgciixciix 1 0.00 cgciixciiscy 1 0.00 cgciixciis 1 0.00 cgciixciiiiu 1 0.00 cgciixccccgcy 1 0.00 cgciisoiscy 1 0.00 cgciisoij 1 0.00 cgciiscgcy 1 0.00 cgciisccccy 1 0.00 cgciiscccccgciix 1 0.00 cgciiix 1 0.00 cgciiiscycgcy 1 0.00 cgciiiiiu 1 0.00 cgcicljccy 1 0.00 cgccois 1 0.00 cgcccljcccgcy 1 0.00 cgccccy 1 0.00 cgccccoix 1 0.00 cgcccccy 1 0.00 cgcccccgcy 1 0.00 ccqjoix 1 0.00 ccqjciix 1 0.00 ccqjciiis 1 0.00 ccqjccgcy 1 0.00 ccqgzccccgcy 1 0.00 ccqgccccgcy 1 0.00 ccoqjcy 1 0.00 ccoixo 1 0.00 ccoixljccccy 1 0.00 ccoixcy 1 0.00 ccoixccccy 1 0.00 ccocgciiiiu 1 0.00 ccljcy 1 0.00 ccljciix 1 0.00 ccljciisoix 1 0.00 ccljciis 1 0.00 ccljciiiiu 1 0.00 ccljcccy 1 0.00 cclgciiiiu 1 0.00 ccixz 1 0.00 ccixis 1 0.00 ccixciiiiiu 1 0.00 ccixcgciiiiu 1 0.00 ccixccqgzccccy 1 0.00 ccis 1 0.00 ccczcy 1 0.00 ccczciix 1 0.00 cccyqjcccy 1 0.00 cccqgoix 1 0.00 cccqgcy 1 0.00 cccqgcccgcy 1 0.00 cccqgccccgcy 1 0.00 cccqg 1 0.00 cccoixcccgcy 1 0.00 cccoixccccy 1 0.00 cccljcois 1 0.00 cccljciij 1 0.00 cccljcciix 1 0.00 cccljccg 1 0.00 cccljccccg 1 0.00 cccljc 1 0.00 ccclj 1 0.00 ccciixoixcy 1 0.00 ccciisois 1 0.00 ccciiscy 1 0.00 cccgoix 1 0.00 cccgciij 1 0.00 cccgciiiu 1 0.00 ccccqjcccy 1 0.00 ccccqjcccgcy 1 0.00 ccccqgcccgcy 1 0.00 ccccois 1 0.00 ccccljco 1 0.00 ccccljcccy 1 0.00 cccciixois 1 0.00 ccccgois 1 0.00 ccccgcyljciis 1 0.00 ccccgcyix 1 0.00 ccccgcccy 1 0.00 cccccoix 1 0.00 ccccciiiiu 1 0.00 cccccgciis ----- ---- ---- 1148 1.00 TOT Some conclusions: * The gallows characters \qj/ and \lj/ appear to be closely related: for every common word with \lj/, there appears to be a a word with \qj/ that occurs with about 1/4 the frequency. * The same phenomenon can be noted with respect to prefixes containing \cc/ and \csc/: for every word beginning with \cc/, there is a word where the first \cc/ is replaced by \csc/, and practically the same frequency. * There apepars to be much confusion between the suffixes \iu/ and \iiiu/. * There appears to be much confusions between \o/ and \ci/ Recall also our previous guess that \cy/ is just the final form of \ci/. Therefore, I have decided to do the following simplifications before recomputing the consensus file: * Ignore, for the time being, the difference between \qj/ and \lj/, and between \qg/ and \lg/, replacing them by \h/ and \p/, respectively; * omit the \s/ plume after \c/; * replace all strings \iiu/, \iiiu/, \iiiiu/, etc. by \m/ * replace \cy/ by \ci/. * replace \o/ by \ci/ Needless to say, I don't mean that these differences are meaningless; it is just that there seems to be structure to be discovered that does not depend on these features. cat bio-m-jsa.evt \ | jsa2hec \ | make-consensus-interlin \ > bio-x-hec.evt cat bio-x-hec.evt \ | egrep '^<.*;J> ' \ | sed \ -e 's/{[^}]*}//g' \ > bio-j-hec.evt extract-words-from-interlin \ -chars "mrcgiAeHP" \ bio-j-hec.evt \ bio-j-hec jsa2hec ------------------------------------------------- #! /n/gnu/bin/sed -f # Recoding superanalytic to ad-hoc encoding: /^[^#]/s/ij/f/g /^[^#]/s/ix/e/g /^[^#]/s/cy/X/g /^[^#]/s/ci/X/g /^[^#]/s/iiiiu/m/g /^[^#]/s/iiiu/m/g /^[^#]/s/iiu/m/g /^[^#]/s/iis/v/g /^[^#]/s/is/r/g /^[^#]/s/X/ci/g /^[^#]/s/o/ci/g /^[^#]/s/cs/c/g /^[^#]/s/qci/A/g /^[^#]/s/qj/H/g /^[^#]/s/qg/P/g /^[^#]/s/lj/H/g /^[^#]/s/lg/P/g ------------------------------------------------- lines words bytes file ------ ------- --------- ------------ 7069 7069 51059 bio-j-hec.wds 1495 1495 14517 bio-j-hec.dic 5081 5081 37045 bio-j-hec-gut.wds 627 627 5347 bio-j-hec-gut.dic 929 929 3085 bio-j-hec-fun.wds 63 63 473 bio-j-hec-fun.dic 1059 1059 10929 bio-j-hec-bad.wds 805 805 8697 bio-j-hec-bad.dic Digraph counts: m r c g i A e H P TOT ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- . . 84 3090 . . 1376 288 173 70 5081 m 738 . . 5 . . . . . . 743 r 449 . . 154 . . . . 2 . 605 c 47 . . 7573 2197 6164 . 16 382 33 16412 g 50 . 1 2138 . . . 4 4 . 2197 i 2889 742 509 97 . 2 8 1274 600 45 6166 A 8 1 9 32 . . 1 137 1174 24 1386 e 888 . 2 614 . . 1 3 209 11 1728 H 10 . . 2528 . . . 6 . . 2544 P 2 . . 181 . . . . . . 183 ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- TOT 5081 743 605 16412 2197 6166 1386 1728 2544 183 37045 Next-symbol probability (× 99): m r c g i A e H P TOT ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- . . 2 60 . . 27 6 3 1 99 m 98 . . 1 . . . . . . 99 r 73 . . 25 . . . . . . 99 c . . . 46 13 37 . . 2 . 99 g 2 . . 96 . . . . . . 99 i 46 12 8 2 . . . 20 10 1 99 A 1 . 1 2 . . . 10 84 2 99 e 51 . . 35 . . . . 12 1 99 H . . . 98 . . . . . . 99 P 1 . . 98 . . . . . . 99 ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- TOT 14 2 2 44 6 16 4 5 7 0 37045 Previous-symbol probability (× 99): m r c g i A e H P TOT ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- . . 14 19 . . 98 17 7 38 14 m 14 . . . . . . . . . 2 r 9 . . 1 . . . . . . 2 c 1 . . 46 99 99 . 1 15 18 44 g 1 . . 13 . . . . . . 6 i 56 99 83 1 . . 1 73 23 24 16 A . . 1 . . . . 8 46 13 4 e 17 . . 4 . . . . 8 6 5 H . . . 15 . . . . . . 7 P . . . 1 . . . . . . 0 ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- TOT 99 99 99 99 99 99 99 99 99 99 37045 cat bio-j-hec.wds \ | /n/gnu/bin/awk -f foo.awk \ | sort | uniq -c | sort -nr | expand \ > bio-j-hec-wpairs.freq It seems that the gallows letters transected by \c..c/ are distinct letters. To check that, let's look at the distribution of \c/ strings on their own and around the \lj/ and \qj/ gallows, ignoring the \s/ plumes on \c/: cat bio-j-jsa.wds \ | sed -e 's/cs/c/' \ | enum-contexts -vPAT='[clqj]*c[clqj]*' -vLCTX=0 -vRCTX=1 \ | wfreq4 1848 cy 14 qjccccy 3 qcccg 1 qcqjccccg 672 ccccg 14 ccccljcy 3 cs 1 qci 469 ci 12 cccqg 3 cqjco 1 qccy 453 cg 11 ljco 3 cqjcccg 1 qcci 425 ljci 11 cqg 3 ccqg 1 qcccy 251 ljccg 11 cccljcccg 3 cccqjccg 1 qccccg 243 ljcccg 10 cccqjcy 3 cccqjcccg 1 ljcs 227 ccccy 10 ccccqjcy 3 ccccqjcccg 1 ljcg 149 qjci 9 ccs 3 ccccqg 1 ljccci 131 ljcccy 9 cccljcccy 3 cc 1 ljccccljccy 129 qjccg 9 cccc 2 qjcg 1 cqjcy 100 ljcy 8 qjco 2 qcqjccg 1 cqjcco 87 cci 8 cccljccg 2 qcljcccg 1 cqjcci 86 cccg 7 cljccy 2 ljcci 1 cqjccg 80 cccccg 7 cljcccy 2 ljccco 1 cqjccc 79 qjcccg 7 cccljci 2 ljcccccg 1 ccqjccg 75 cccccy 6 cccco 2 cqjccy 1 ccqjcccy 74 ccco 5 qjcco 2 cljci 1 ccljco 64 co 5 ljcco 2 cljccg 1 ccljcccy 55 cccljccy 5 cqjcccy 2 clg 1 ccljcccg 54 ljccy 5 ccy 2 ccqjo 1 cclg 52 cco 5 ccljci 2 ccqjci 1 cccs 52 cccy 5 ccg 2 ccljccg 1 cccqjci 44 qjcy 5 ccccljcccy 2 ccccljcci 1 cccljco 44 ccccljccy 5 ccccljcccg 2 ccccljccg 1 cccljcci 38 cccqjccy 5 ccccc 2 ccccci 1 cccljccccy 36 qjcccy 4 qjccco 2 ccc 1 cccljccccg 29 ccccqjccy 4 ljcccccy 1 qjcccccy 1 cccljc 26 qjccccg 4 cljcccg 1 qjccc 1 ccccs 26 ljccccg 4 ccqjcy 1 qjcc 1 ccccqjci 25 ljccccy 4 ccljcy 1 qcy 1 ccccqjcccy 24 qjccy 4 cccqjcccy 1 qcqjcy 1 ccccljco 24 cccci 4 ccclj 1 qcqjci 1 cccccqjcccy 23 ccci 4 cccccs 1 qcqjccy 1 ccccco 20 cccljcy 3 qjccci 1 qcqjcccy 1 ccccccy 16 c Separating into categories: No gallows \lj/ gallows \qj/ gallows 1848 cy 425 ljci 149 qjci 469 ci 251 ljccg 129 qjccg 453 cg 243 ljcccg 79 qjcccg 672 ccccg 131 ljcccy 44 qjcy 227 ccccy 100 ljcy 36 qjcccy 87 cci 55 cccljccy 38 cccqjccy 86 cccg 54 ljccy 29 ccccqjccy 80 cccccg 44 ccccljccy 26 qjccccg 75 cccccy 26 ljccccg 24 qjccy 74 ccco 25 ljccccy 14 qjccccy 64 co 20 cccljcy 12 cccqg 52 cco 14 ccccljcy 11 cqg 52 cccy 11 ljco 10 cccqjcy 24 cccci 11 cccljcccg 10 ccccqjcy 23 ccci 8 qjco 16 c