Ok, let's go back to the search for "syllabes" in the Bio section. Here is the chassification we had before: cat bio-m-evt.evt \ | grep ';C>' \ | sed \ -e 's/{[^}]*}//g' \ -e 's/[\!%]//g' \ > .tmp-c-fsg.evt cat bio-m-evt.evt \ | grep ';C>' \ | sed \ -e 's/{[^}]*}//g' \ -e 's/[\!%]//g' \ > .tmp-c-fsg.evt extract-words-from-interlin \ -chars 'COG8EDA4TSHRNM2ZPIKLF' \ .tmp-c-fsg.evt \ .tmp-c-fsg cat .tmp-c-fsg.txt \ | /n/gnu/bin/sed \ -e 's/$/ /g' \ -e 's/CI/A/g' \ -e 's/IL/N/g' \ -e 's/IN/M/g' \ -e 's/I*\([MNRK]\)/\1/g' \ -e 's/CM/AN/g' \ -e 's/OM/AM/g' \ -e 's/ON/AN/g' \ -e 's/DM/DAN/g' \ -e 's/HM/HAN/g' \ -e 's/4O/Q/g' \ -e 's/A /G /g' \ | /n/gnu/bin/tr ' ' '\012' \ | egrep -v '[*6LKI4/=]' \ | /n/gnu/bin/tr '\012' ' ' \ | fold -w 90 -s \ | /n/gnu/bin/sed \ -e 's/^ *//g' -e 's/ *$//g' -e 's/ */ /g' \ > .voyn.fsg cat .voyn.fsg \ | tr -d ' \012' \ | fold -w 90 \ > .voyn.fss cat .voyn.fss \ | tr -d ' \012' \ | /n/gnu/bin/sed \ -e 's/D/H/g' \ -e 's/F/P/g' \ -e 's/S/T/g' \ -e 's/N/M/g' \ -e 's/PZ/X/g' \ -e 's/HZ/X/g' \ \ -e 's/AM/m/g' \ -e 's/AR/r/g' \ -e 's/AE/e/g' \ -e 's/A/O/g' \ -e 's/8G/g/g' \ -e 's/OR/x/g' \ -e 's/OEH/f/g' \ -e 's/QEH/v/g' \ -e 's/OE/u/g' \ -e 's/QE/w/g' \ -e 's/OH/b/g' \ -e 's/QH/h/g' \ -e 's/OP/p/g' \ -e 's/QP/q/g' \ -e 's/EH/j/g' \ -e 's/TCC/a/g' \ -e 's/XCC/y/g' \ -e 's/TC/t/g' \ -e 's/XC/d/g' \ -e 's/CC/c/g' \ -e 's/R/2/g' \ \ -e 's/[mre]/m/g' \ -e 's/[Gg]/g/g' \ -e 's/[Ppq]/p/g' \ -e 's/[Xdy]/y/g' \ -e 's/[Cc]/c/g' \ -e 's/[xu]/u/g' \ -e 's/[28]/i/g' \ -e 's/[Tta]/t/g' \ -e 's/[Hhbfvj]/h/g' \ \ -e 's/hcg/@/g' \ -e 's/htg/\$/g' \ -e 's/tyg/%/g' \ -e 's/ptg/\#/g' \ -e 's/tg/3/g' \ -e 's/cg/5/g' \ -e 's/im/7/g' \ -e 's/hm/9/g' \ -e 's/hg/1/g' \ -e 's/hu/./g' \ | count-digraph-freqs \ -v showentropy=1 \ -v chars=' muwphQEigOytc@%#$13579.' With a little more fiddling, we get cat .voyn.fss \ | tr -d ' \012' \ | fss2syl \ | fold -w 80 \ > .tmp cat .tmp \ | tr -d ' \012' \ | count-digraph-freqs \ -v showentropy=1 \ -v chars=' mu:wphQE8igOytC=+-' OK, let's do it differently: first we try to group the letters, then we try to map them to characters. First, let's try to be a bit softer on the correction, accepting "K" and loose "4": Perhaps should correct also "4D" to "4OD", "4C" to "4OC". cat .tmp-c-fsg.txt \ | /n/gnu/bin/sed \ -e 's/$/ /g' \ -e 's/IL/N/g' \ -e 's/CI/A/g' \ -e 's/IN/M/g' \ -e 's/I*\([MNRKE]\)/\1/g' \ -e 's/CN/AN/g' \ -e 's/CM/AN/g' \ -e 's/OM/AM/g' \ -e 's/ON/AN/g' \ -e 's/OK/AK/g' \ -e 's/DM/DAN/g' \ -e 's/HM/HAN/g' \ -e 's/4AE/4OE/g' \ -e 's/A /G /g' \ | /n/gnu/bin/tr ' ' '\012' \ | egrep -v '[*6LI/=]' \ | /n/gnu/bin/tr '\012' ' ' \ | fold -w 90 -s \ | /n/gnu/bin/sed \ -e 's/^ *//g' -e 's/ *$//g' -e 's/ */ /g' \ > .voyn.fsg cat .voyn.fsg \ | tr -d ' \012' \ | fold -w 90 \ > .voyn.fss cat .voyn.fss \ | tr -d ' \012' \ | fss2mor \ > .voyn.mor cat .voyn.mor \ | sort | uniq -c | expand \ | sort +0.0 -0.7nr \ > .voyn-m.frq cat .voyn.mor \ | sed -e 's/)(/)@(/g' \ | tr '@' '\n' \ | classify-glyphs \ | sort | uniq -c | expand \ | sort +0.8 -0.99 \ > .voyn-g.frq --- fss2mor ------------------------ #! /n/gnu/bin/sed -f # Splits corrected FSG into "morphemes" s/AK/(ak)/g s/AM/(am)/g s/AN/(an)/g s/AR/(ar)/g s/AE/(ae)/g s/A/O/g # s/8G/(bg)/g s/OG/(og)/g s/G/(g)/g # s/PZ/(pz)/g s/FZ/(fz)/g s/HZ/(hz)/g s/DZ/(dz)/g # s/z)CCC/zccc)/g s/z)CC/zcc)/g s/z)C/zc)/g # s/H/(h)/g s/D/(d)/g # s/h)CCC/hccc)/g s/h)CC/hcc)/g s/h)C/hc)/g s/d)CCC/dccc)/g s/d)CC/dcc)/g s/d)C/dc)/g # s/P/(p)/g s/F/(f)/g # s/p)CCC/pccc)/g s/p)CC/pcc)/g s/p)C/pc)/g s/f)CCC/fccc)/g s/f)CC/fcc)/g s/f)C/fc)/g # s/4OE/(qoe)/g s/OE/(oe)/g s/4OR/(qor)/g s/OR/(or)/g # s/4OCCC/(qoccc)/g s/4OCC/(qocc)/g s/4OC/(qoc)/g s/4O/(qo)/g s/OCCC/(occc)/g s/OCC/(occ)/g s/OC/(oc)/g s/O/(o)/g s/4CCC/(qccc)/g s/4CC/(qcc)/g s/4C/(qc)/g s/4/(q)/g # s/SCCC/(sccc)/g s/SCC/(scc)/g s/SC/(sc)/g s/S/(s)/g s/TCCC/(tccc)/g s/TCC/(tcc)/g s/TC/(tc)/g s/T/(t)/g s/CCC/(ccc)/g s/CC/(cc)/g # s/R/(r)/g s/2/(z)/g s/8/(b)/g s/E/(e)/g s/C/(c)/g # s/(\([q]*[o]*[c]*\))(\([dhpf]\)/(\1\2/g # s/\(g)\)/\1\ /g s/\(a[mnrek])\)/\1\ /g s/\(o[rek])\)/\1\ /g ------------------------------------ --- classify-glyphs ------------------------ #! /n/gnu/bin/sed -f # Classifies the glyphs as parsed by fss2mor # Assumes one glyph per line in the format (xxx...) # Prepends a one-letter class code and a space # Class codes: # a = (am) (ar) (an) (ak) (ae) # g = (bg) (og) (g) # D = (...dz...); same for H,P,F # d = (...d...) except (...dz...); same for h p f # x = ([q]*[o]*[c]*) # o = (oe) (or) (ok) (qoe) (qor) (qok) # s = (s...) # t = (t...) # c = (c...) # r = (r...) # 2 = (z...) # 8 = (b...) except (bg) # ? = other /^([a-z0-9]*)$/!s/^/\? /g s/^(a[mnrek])/a &/g s/^([bo]*g)/g &/g s/^(.*dz.*)/D &/g s/^(.*hz.*)/H &/g s/^(.*pz.*)/P &/g s/^(.*fz.*)/F &/g s/^(.*d.*)/d &/g s/^(.*h.*)/h &/g s/^(.*p.*)/p &/g s/^(.*f.*)/f &/g s/^([q]*[o]*[c]*)/x &/g s/^([q]*o[erk])/o &/g s/^(s.*)/s &/g s/^(t.*)/t &/g s/^(r.*)/r &/g s/^(c.*)/c &/g s/^(z.*)/z &/g s/^(b.*)/b &/g s/^(e.*)/e &/g s/^(.*)/\? &/g -------------------------------------------- Here are the classified glyphs, manually rearranged with current best guesses: 438 e (e) = valid ------------------------ 671 b (b) = valid ------------------------ 177 r (r) = valid ------------------------ 368 z (z) = valid ("2") ------------------------ 2051 g (bg) = valid ("8G") 1696 g (g) = valid 20 g (og) = misreading of "8G"? ------------------------ 1151 o (oe) = valid 287 o (or) = valid 194 o (qoe) = valid 16 o (qor) = valid? ------------------------ 137 D (dz) = valid 44 D (dzc) = valid 1 D (dzcc) = misreading of "DZC"? 1 D (cdz) = misreading of "ODZ"? 5 D (odz) = valid? 2 D (odzc) = valid? 0 D (qdz) = negligible 1 D (qdzc) = negligible 0 D (qdzcc) = negligible 2 D (qodz) = valid? 5 D (qodzc) = valid? ------------------------ 84 H (hz) = valid 23 H (hzc) = valid 3 H (ohz) = valid? 4 H (ohzc) = valid? 3 H (qhz) = negligible 1 D (qhzc) = negligible 1 H (qhzcc) = negligible 0 H (qohz) = valid? 2 H (qohzc) = valid? ------------------------ 11 P (pz) = valid 7 P (pzc) = valid 1 P (cpzc) = negligible 1 P (opz) = negligible 1 P (opzc) = negligible ------------------------ 2 F (fz) = negligible 1 F (fzc) = negligible 1 F (fzcc) = negligible ------------------------ 556 a (ae) = valid 50 a (ak) = valid? 436 a (am) = valid 489 a (an) = valid 428 a (ar) = valid ------------------------ 316 d (d) = valid 99 d (dc) = valid 125 d (dcc) = valid 7 d (dccc) = misreading of "DCC"? 1 d (cd) = misreading of "OD"? 1 d (cdc) = misreading of "ODC"? 1 d (ocdcc) = misreading of "ODZC"? 210 d (od) = valid 74 d (odc) = valid 64 d (odcc) = valid 2 d (odccc) = mireading of "ODCC"? 5 d (qcd) = misreading of "4OD"? 1 d (qcdc) = misreading of "4ODC"? 1 d (qcdcc) = misreading of "4ODCC"? 4 d (qd) = misreading of "4OD"? 1 d (qdccc) = misreading of "4ODCC"? 1 d (qoccdcc) = negligible 575 d (qod) = valid 234 d (qodc) = valid 249 d (qodcc) = valid 5 d (qodccc) = negligible ------------------------ 182 h (h) = valid 64 h (hc) = valid 31 h (hcc) = valid 1 h (ch) = misreading of "OH"? 168 h (oh) = valid 81 h (ohc) = valid 45 h (ohcc) = valid 1 h (ohccc) = misreading of "OHCC"? 2 h (qch) = misreading of "4OH"? 1 h (qchc) = misreading of "4OHC"? 1 h (qh) = misreading of "4OH"? 149 h (qoh) = valid 62 h (qohc) = valid 53 h (qohcc) = valid 1 h (qohccc) = misreading of "4OHCC"? ------------------------ 15 x (c) = to check 20 x (cc) = to check 7 x (ccc) = to check 86 x (o) = to check 2 x (oc) = to check 5 x (occ) = to check 1 x (occc) = to check 11 x (q) = to check 2 x (qc) = to check 5 x (qcc) = to check 2 x (qccc) = to check 29 x (qo) = to check 7 x (qocc) = to check 2 x (qoccc) = to check ------------------------ 116 p (p) = valid 1 p (pc) = negligible 2 p (pcc) = negligible 3 p (cp) = misreading of "OP"? 44 p (op) = valid 1 p (opc) = negligible 2 p (qp) = misreading of "4OP"? 23 p (qop) = valid ------------------------ 15 f (f) = valid 1 f (fc) = negligible 1 f (cf) = misreading of "OF"? 7 f (of) = valid 2 f (qf) = misreading of "4OF"? 5 f (qof) = valid ------------------------ 210 s (s) = valid 712 s (sc) = valid 149 s (scc) = valid ------------------------ 397 t (t) = valid 920 t (tc) = valid 124 t (tcc) = valid 2 t (tccc) = misreading of "TCC"? ------------------------ 2 ? K(z) = misreading of "82"? 1 ? (b)K(or) = misreading of "88OR"? 1 ? (e)K(e) = misreading of "E8E"? 1 ? (oh)K(oe) = misreading of "OH8OE"? Here are the valid ones in order of decreasing frequency: 2051 g (bg) = valid ("8G") 1696 g (g) = valid 1151 o (oe) = valid 920 t (tc) = valid 712 s (sc) = valid 671 b (b) = valid 575 d (qod) = valid 556 a (ae) = valid 489 a (an) = valid 438 e (e) = valid 436 a (am) = valid 428 a (ar) = valid 397 t (t) = valid 368 z (z) = valid ("2") 316 d (d) = valid 287 o (or) = valid 249 d (qodcc) = valid 234 d (qodc) = valid 210 d (od) = valid 210 s (s) = valid 194 o (qoe) = valid 182 h (h) = valid 177 r (r) = valid 168 h (oh) = valid 149 h (qoh) = valid 149 s (scc) = valid 137 D (dz) = valid 125 d (dcc) = valid 124 t (tcc) = valid 116 p (p) = valid 99 d (dc) = valid 84 H (hz) = valid 81 h (ohc) = valid 74 d (odc) = valid 64 d (odcc) = valid 64 h (hc) = valid 62 h (qohc) = valid 53 h (qohcc) = valid 50 a (ak) = valid? 45 h (ohcc) = valid 44 D (dzc) = valid 44 p (op) = valid 31 h (hcc) = valid 23 H (hzc) = valid 23 p (qop) = valid 16 o (qor) = valid? 15 f (f) = valid 11 P (pz) = valid 7 P (pzc) = valid 7 f (of) = valid 5 D (odz) = valid? 5 D (qodzc) = valid? 5 f (qof) = valid 4 H (ohzc) = valid? 3 H (ohz) = valid? 2 D (odzc) = valid? 2 D (qodz) = valid? 2 H (qohzc) = valid? 0 H (qohz) = valid? Let's redo this analysis collapsing "D" with "H" and "F" with "P". That will give about 25 valid letters... cat .voyn.fss \ | tr -d ' \012' \ | fss2mor \ | /n/gnu/bin/sed \ -e 's/d/h/g' \ -e 's/f/p/g' \ > .voyh.mor cat .voyh.mor \ | sort | uniq -c | expand \ | sort +0.0 -0.7nr \ > .voyh-m.frq cat .voyh.mor \ | sed -e 's/)(/)@(/g' \ | tr '@' '\n' \ | classify-glyphs \ | sort | uniq -c | expand \ | sort +0.8 -0.99 \ > .voyh-g.frq ------------------------ 368 z (z) = valid ------------------------ 671 b (b) = valid ------------------------ 438 e (e) = valid ------------------------ 177 r (r) = valid ------------------------ 556 a (ae) = valid 50 a (ak) = valid? 436 a (am) = valid 489 a (an) = valid 428 a (ar) = valid ------------------------ 1151 o (oe) = valid 287 o (or) = valid 194 o (qoe) = valid 16 o (qor) = valid? ------------------------ 2051 g (bg) = valid 1696 g (g) = valid 20 g (og) = valid? ------------------------ 221 H (hz) = valid 67 H (hzc) = valid 1 H (hzcc) = negligible 1 H (chz) = misreading of "OHZ"? 8 H (ohz) = valid? 6 H (ohzc) = valid? 0 H (ohzcc) = non-ocurring 3 H (qhz) = valid? 2 H (qhzc) = valid? 1 H (qhzcc) = negligible 2 H (qohz) = valid? 7 H (qohzc) = valid? 0 H (qohzcc) = non-ocurring ------------------------ 498 h (h) = valid 163 h (hc) = valid 156 h (hcc) = valid 7 h (hccc) = misreading of "HCC"? 2 h (ch) = misreading of "OH"? 1 h (chc) = misreading of "OHC"? 1 h (ochcc) = misreading of "OHCC"? 378 h (oh) = valid 155 h (ohc) = valid 109 h (ohcc) = valid 3 h (ohccc) = misreading of "OHCC"? 7 h (qch) = misreading of "4OH"? 2 h (qchc) = misreading of "4OHC"? 1 h (qchcc) = misreading of "4OHCC"? 5 h (qh) = misreading of "4OH"? 0 h (qhc) = non-occurring 0 h (qhcc) = non-occurring 1 h (qhccc) = misreading of "4OHCC"? 1 h (qocchcc) = negligible 724 h (qoh) = valid 296 h (qohc) = valid 302 h (qohcc) = valid 6 h (qohccc) = negligible ------------------------ 13 P (pz) = valid 8 P (pzc) = valid 1 P (pzcc) = negligible 1 P (cpzc) = misreading of "OPZC"? 1 P (opz) = negligible 1 P (opzc) = negligible ------------------------ 131 p (p) = valid 2 p (pc) = negligible 2 p (pcc) = negligible 4 p (cp) = misreading of "OP"? 51 p (op) = valid 1 p (opc) = negligible 4 p (qp) = misreading of "4OP"? 28 p (qop) = valid ------------------------ 210 s (s) = valid 712 s (sc) = valid 149 s (scc) = valid 397 t (t) = valid 920 t (tc) = valid 124 t (tcc) = valid 2 t (tccc) = misreading of "TCC"? ------------------------ 15 x (c) = valid? mostly in (b)(c)(g) or (b)(c)(bg) 20 x (cc) = valid? misreading of "T"? 7 x (ccc) = valid? misreading of "TC"? ------------------------ 86 x (o) = valid? misreading of "OR" as "O2"? 2 x (oc) = misreading of "CC" 5 x (occ) = misreading? 1 x (occc) = negligible 11 x (q) = misreading? 2 x (qc) = misreading of "4O"? 5 x (qcc) = valid? 2 x (qccc) = misreading od "4CC"? 29 x (qo) = valid? 0 x (qoc) = remarkable absence 7 x (qocc) = valid 2 x (qoccc) = misreading of "4OCC"? ------------------------ 1 ? (b)K(or) = negligible 1 ? (e)K(e) = negligible 1 ? (oh)K(oe) = negligible 2 ? K(z) = negligible Possible post-corrections: s/(t)(s)/(tcc)/g s/(o)(z)/(or)/g Ok, let's try to make some sense out of this. Voynichese seems to be made up of the following elements: (0) the "O*" groups "OE" "4OE" "OR" "4OR" (2) The "A*" elements: "AE" "AK" "AM" "AN" "AR" "AIR" The "AIR" group above was accidentaly converted to "AR" in the initial cleanup, that is why it doesn't appear in the tables. It is relativey rare (28 times, agains 400 of "AR"), so it may be really a misreading of "AR". The "AK" element is also rare (43 occurrences), but not enough to be considered an error. (1) the isolated letters "2" "8" "E" (when not attached to "O" or "A") "R" (3) The "G" elements: "8G" "G" The "OG" combination behaves like "8G", but is very rare: 19 occurrences, about 1% of "8G". So it is probably a misreading of "8G". (4) The "T/S" elements: "T" "TC" "TCC" "S" "SC" "SCC" The group "TCCC" occurs twice (while the rest occur hundreds of times), so it is probably a misreading of "TCC". (5) The "O" prefixes: "O" "4O" The prefixes "4" and "4C" occur very rarely; they are probably misreadings of "4O", and may be ignored. (6) The non-intruding two-legged gallows ("H" and "D"): "D" "DC" "DCC" "H" "HC" "HCC" The suffix "CCC" occurs very rarely, and is probably a misreading of "CC". (7) The Intruding two-legged gallows ("HZ" and "DZ"): "DZ" "DZC" "HZ" "HZC" The suffix "CC" occurs once or twice, but may well be a misreading of "C". The absence of the "CC" suffix suggests that the base of the intruding gallows already counts as a "C" suffix. (8) The one-legged, non-intruding gallows ("P" and "F"): "P" "F" The suffixes "C" and "CC" occur a couple of times, but the frequency (even relative) is so low that we can ignore them. (9) The one-legged, intruding gallows ("PZ" and "FZ")" "PZ" "PZC" "FZ" "FZC" The "CC" suffix occurs only once and can be ignored. The groups "PZ" and "FZ" themselves are quite rare (25 occurrences total), so we could ignore them altogether. (10) Unattached "C"s: these are "C"s that cannot be parsed as part of other letters. "C" "CC" "CCC" There are about 40 occurrences total, and they seem to occur in special contexts. Note that "CC" and "CCC" may be misreadings of "T" and "TC", which are far more plentiful. Perhaps we should treat these elements as errors until we understand them better. There are a few sequences that cannot be parsed as above, and should be examined more closely. So, let's revise the parsing script. We will do these corrections (the numbers are the occurrences in the Bio section, Currier's transcription): s/CI/A/g # 11 s/IIIL/M/g # 24 s/CM/AN/g # 8 s/AL/AN/g # 12 s/A /G /g # 6 s/A2/AR/g # 7 s/AO/AE/g # 5 s/AI+E/AE/g # 6 s/AII+R/AIR/g # 4 s/4A/4O/g # 4 s/4CD/4OD/g # 7 s/4CH/4OH/g # 3 Also, after collection the AI*[MNRKE] endings, any remaining "A"s are mapped to "O"s: s/A/O/g # 44 Let's look again at the FSG combinations that may occur adjacent to word and line breaks: cat .voyn.fsg \ | tr -d '=\012' \ | sed \ -e 's:^:/:' \ -e 's:$:/:' \ -e 's:///*:/:g' \ -e 's:/ *:/:g' \ -e 's: */:/:g' \ -e 's: *:_:g' \ | enum-trigraphs \ | grep -v ' ' \ > .voyn.tri cat .voyn.tri \ | tr '/' '_' \ | count-transition-freqs \ -v chars='_/COG8EDA4TSHRNM2ZPIKLF6*' \ > .voyn-word-end.frq Forward transition probabilities (× 99): count freq ntrpy pntpy _ C O G 8 E D A 4 T S H R N M 2 Z P I K L F 6 * -- ----- ----- ----- ----- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- H* 2 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . R* 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . 82 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . O2 7 0.000 0.592 0.000 85 . . . . . . 14 . . . . . . . . . . . . . . . . T2 6 0.000 0.650 0.000 83 . . . 17 . . . . . . . . . . . . . . . . . . . C4 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . A6 6 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . C6 2 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . O6 2 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . _6 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . IE 11 0.000 0.866 0.000 81 . . . . . . 9 . . . . . . . . . 9 . . . . . . KE 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . *G 2 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . 2G 10 0.000 0.469 0.000 89 . . . . . . . . . . . . . . 10 . . . . . . . . 8G 2052 0.058 0.733 0.042 90 . 1 . . 1 1 . 3 . . 1 1 . . . . . . . . . . . AG 4 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . CG 842 0.024 0.638 0.015 91 . 1 . . 1 1 . 3 . . 1 . . . . . . . . . . . . DG 169 0.005 0.577 0.003 92 . 1 . 1 1 1 . 1 1 . 1 1 . . . . . . . . . . . EG 106 0.003 0.594 0.002 92 . 1 . 1 . 1 . 2 1 . 1 1 . . . . . . . . . . . HG 88 0.002 1.226 0.003 81 . 3 . 3 3 . . 1 1 1 2 1 . . 1 . . . . . . . . MG 6 0.000 0.650 0.000 83 . . . . . . . . . . 17 . . . . . . . . . . . . NG 2 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . PG 6 0.000 0.650 0.000 83 . . . . . 17 . . . . . . . . . . . . . . . . . RG 45 0.001 0.717 0.001 88 . . . 4 2 . . . . 2 2 . . . . . . . . . . . . TG 62 0.002 0.474 0.001 93 . 2 . 2 . . . . . . 2 . . . 2 . . . . . . . . ZG 204 0.006 0.640 0.004 90 . 2 . 2 . . . 2 . . . . . . 1 . . . . . . . . AK 43 0.001 0.159 0.000 97 . 2 . . . . . . . . . . . . . . . . . . . . . GK 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . IK 4 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . OK 7 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . AL 12 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . IL 26 0.001 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . OL 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . AM 399 0.011 0.569 0.006 92 . 2 1 . . . . . 1 1 . . . . . . . . . . . . . CM 8 0.000 0.544 0.000 87 . . . . . . . . . 12 . . . . . . . . . . . . . DM 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . *N 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . AN 475 0.013 0.577 0.008 91 . 3 . 1 . . . . 1 1 . . . . . . . . . . . . . ON 7 0.000 0.592 0.000 85 . 14 . . . . . . . . . . . . . . . . . . . . . *O 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . KO 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . CR 8 0.000 0.544 0.000 87 . . 12 . . . . . . . . . . . . . . . . . . . . SR 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . ** 3 0.000 1.585 0.000 33 . . 33 . . . . . . . . . . . . . . 33 . . . . . A* 5 0.000 1.371 0.000 59 . 20 . 20 . . . . . . . . . . . . . . . . . . . C* 10 0.000 2.646 0.001 30 . . 10 20 . 10 10 . . . . . 10 . . . . . . . . . 10 D* 2 0.000 1.000 0.000 50 . . . . . . . . . 50 . . . . . . . . . . . . . T* 2 0.000 1.000 0.000 50 . . . 50 . . . . . . . . . . . . . . . . . . . A2 7 0.000 1.149 0.000 71 . 14 14 . . . . . . . . . . . . . . . . . . . . C2 45 0.001 1.482 0.002 73 4 9 4 2 . . 4 . . . 2 . . . . . . . . . . . . E2 16 0.000 1.649 0.001 50 . 6 12 . . . 31 . . . . . . . . . . . . . . . . A8 8 0.000 1.406 0.000 37 . . 50 . . . . . . . . 12 . . . . . . . . . . . M8 2 0.000 1.000 0.000 50 . . . . . . 50 . . . . . . . . . . . . . . . . 4A 4 0.000 1.500 0.000 25 . . . . 50 . . . . . . 25 . . . . . . . . . . . AD 4 0.000 2.000 0.000 25 . . 25 . . . 25 . . . . . . . . 25 . . . . . . . 8E 10 0.000 1.685 0.000 50 . . . . . . . 10 10 30 . . . . . . . . . . . . . AE 552 0.015 1.869 0.029 67 . 6 7 4 . 1 1 1 5 4 1 . . . 1 . . . . . . . . CE 4 0.000 0.811 0.000 74 . . . . . . . . . 25 . . . . . . . . . . . . . DE 11 0.000 1.868 0.001 54 . 9 . . . . 9 . 18 . . 9 . . . . . . . . . . . HE 3 0.000 0.918 0.000 33 . 66 . . . . . . . . . . . . . . . . . . . . . LE 3 0.000 0.918 0.000 66 . . . . . 33 . . . . . . . . . . . . . . . . . OE 1344 0.038 2.638 0.100 44 1 5 4 3 . 16 2 . 11 7 2 . . . . . 1 . . . . . . SE 5 0.000 1.371 0.000 59 . . . 20 . . . . 20 . . . . . . . . . . . . . . TE 13 0.000 2.035 0.001 53 . . . 8 . . 8 . 15 . . 8 . . 8 . . . . . . . . CF 3 0.000 1.585 0.000 33 33 . . . . . . . 33 . . . . . . . . . . . . . . OG 14 0.000 1.807 0.001 57 . 7 . . . 14 . . 7 14 . . . . . . . . . . . . . SG 28 0.001 1.232 0.001 78 . 4 . 4 . 7 . . . 4 4 . . . . . . . . . . . . EK 2 0.000 1.000 0.000 50 . . . . 50 . . . . . . . . . . . . . . . . . . OM 14 0.000 0.946 0.000 78 . 14 7 . . . . . . . . . . . . . . . . . . . . 8R 2 0.000 1.000 0.000 50 . 50 . . . . . . . . . . . . . . . . . . . . . AR 395 0.011 1.284 0.014 79 . 6 5 1 . . 4 1 2 2 . . . . . . . . . . . . . ER 13 0.000 1.145 0.000 76 . 8 . . . . 8 . 8 . . . . . . . . . . . . . . IR 43 0.001 1.258 0.002 76 . 2 7 . . . 9 . 2 . . . . . . . . . . . . . 2 OR 302 0.008 1.699 0.014 67 1 9 5 . . . 10 . 3 4 . . . . . . . . . . . . . TR 4 0.000 1.000 0.000 50 . . . 50 . . . . . . . . . . . . . . . . . . . O* 2 0.000 1.000 0.000 . . . . . . . 50 . . . . . . . . . . . . . . . 50 _* 10 0.000 2.646 0.001 10 30 . . 10 . . 20 . 10 10 . . . . . . . . . . . . 10 42 2 0.000 1.000 0.000 . . 50 . . . . 50 . . . . . . . . . . . . . . . . G2 16 0.000 2.483 0.001 19 6 12 . . . . 37 . 12 . 6 . . . . . 6 . . . . . . H2 1 0.000 0.000 0.000 . . . . . . . 99 . . . . . . . . . . . . . . . . M2 2 0.000 0.000 0.000 . . . . . . . 99 . . . . . . . . . . . . . . . . N2 1 0.000 0.000 0.000 . . . 99 . . . . . . . . . . . . . . . . . . . . R2 1 0.000 0.000 0.000 . . . . . . . 99 . . . . . . . . . . . . . . . . S2 1 0.000 0.000 0.000 . . . . 99 . . . . . . . . . . . . . . . . . . . Z2 1 0.000 0.000 0.000 . . . 99 . . . . . . . . . . . . . . . . . . . . _2 265 0.007 1.937 0.014 3 . 40 1 . . 2 42 . 4 5 . . . . . . . . . . . . . 84 2 0.000 0.000 0.000 . . 99 . . . . . . . . . . . . . . . . . . . . . E4 9 0.000 0.000 0.000 . . 99 . . . . . . . . . . . . . . . . . . . . . G4 91 0.003 0.174 0.000 . . 97 1 . . . . . . . . . . . 1 . . . . . . . . M4 1 0.000 0.000 0.000 . . 99 . . . . . . . . . . . . . . . . . . . . . N4 1 0.000 0.000 0.000 . . 99 . . . . . . . . . . . . . . . . . . . . . O4 8 0.000 0.544 0.000 . 12 87 . . . . . . . . . . . . . . . . . . . . . R4 3 0.000 0.000 0.000 . . 99 . . . . . . . . . . . . . . . . . . . . . _4 1560 0.044 0.254 0.011 . 1 96 . . . . . . . . . . . . . . . . . . . . . R6 1 0.000 0.000 0.000 . . . . . . . 99 . . . . . . . . . . . . . . . . *8 5 0.000 0.722 0.000 . . . 79 . . . 20 . . . . . . . . . . . . . . . . 28 3 0.000 0.000 0.000 . . . 99 . . . . . . . . . . . . . . . . . . . . 88 3 0.000 0.918 0.000 . . . 66 . 33 . . . . . . . . . . . . . . . . . . C8 1897 0.053 0.543 0.029 3 . . 91 . . . 4 . . . . . . . . . . . . . . . . D8 2 0.000 1.000 0.000 . . . 50 . 50 . . . . . . . . . . . . . . . . . . E8 87 0.002 1.323 0.003 3 . 2 71 1 . . 19 . 1 1 . . . . . . . . . . . . . G8 35 0.001 2.220 0.002 3 6 11 28 . . . 42 3 3 3 . . . . . . . . . . . . . H8 3 0.000 1.585 0.000 . . . 33 33 . . 33 . . . . . . . . . . . . . . . . N8 5 0.000 0.722 0.000 . . . 20 . . . 79 . . . . . . . . . . . . . . . . O8 32 0.001 1.917 0.002 9 6 . 46 . . . 28 . 9 . . . . . . . . . . . . . . P8 3 0.000 1.585 0.000 . . 33 33 . . . 33 . . . . . . . . . . . . . . . . R8 6 0.000 1.252 0.000 . . . 66 17 . . 17 . . . . . . . . . . . . . . . . S8 40 0.001 0.619 0.001 2 2 . 89 . . . 5 . . . . . . . . . . . . . . . . T8 96 0.003 0.850 0.002 3 . 2 83 . . . 11 . . . . . . . . . . . . . . . . Z8 21 0.001 0.723 0.000 5 . 9 85 . . . . . . . . . . . . . . . . . . . . _8 492 0.014 2.034 0.028 1 2 11 12 . 1 1 58 . 6 7 . . . . . . . . . . . . . *A 4 0.000 0.811 0.000 . . . . . 25 . . . . . . . 74 . . . . . . . . . . 2A 131 0.004 2.370 0.009 . . . . . 22 1 . . . . . 17 23 28 1 . . 5 1 1 . . . 6A 1 0.000 0.000 0.000 . . . . . . . . . . . . . . . . . . . 99 . . . . 8A 423 0.012 2.373 0.028 1 . . . . 29 . . . . . . 27 12 22 . . . 3 2 . . . 1 AA 1 0.000 0.000 0.000 . . . . . 99 . . . . . . . . . . . . . . . . . . CA 55 0.002 1.921 0.003 2 . . 2 2 45 . . . . . . 38 4 2 . . . . 4 2 . . . DA 742 0.021 2.317 0.048 . . . . . 23 . . . . . . 13 36 20 . . . 2 1 1 . 1 . EA 55 0.002 2.583 0.004 . . 2 . . 22 . 2 . . . . 18 22 22 . . . 9 4 . . . . FA 2 0.000 1.000 0.000 . . . . . 50 . . . . . . . . 50 . . . . . . . . . GA 3 0.000 0.918 0.000 . . . . . . . . . . . . 66 . 33 . . . . . . . . . HA 259 0.007 2.229 0.016 . . . . . 32 . . . . . . 23 25 14 1 . . 2 1 . . . . MA 1 0.000 0.000 0.000 . . . . . . . . . . . . 99 . . . . . . . . . . . NA 2 0.000 1.000 0.000 . . 50 . . . . . . . . . . . 50 . . . . . . . . . OA 3 0.000 1.585 0.000 . . . . . 33 . . . . . . 33 . 33 . . . . . . . . . PA 14 0.000 1.985 0.001 7 . . . . 42 . . . . . . 28 7 14 . . . . . . . . . RA 96 0.003 2.588 0.007 . . . . . 24 . . . . . 1 19 19 24 . . . 6 4 2 . 1 . SA 21 0.001 1.891 0.001 . . . . . 42 . . . . . . 38 5 5 . . . . 5 5 . . . TA 26 0.001 1.587 0.001 . . . . . 53 . . . . . . 27 4 . . . . . 15 . . . . ZA 9 0.000 1.224 0.000 . . . . . 66 . . . . . . 22 11 . . . . . . . . . . _A 128 0.004 2.570 0.009 . . . 1 1 29 1 . . . 1 2 19 12 26 1 . . 2 4 . . . . *C 3 0.000 1.585 0.000 . 33 . 33 33 . . . . . . . . . . . . . . . . . . . 2C 4 0.000 1.500 0.000 . . . 25 . . . . . . . . . . . 25 . . 50 . . . . . 4C 20 0.001 2.119 0.001 . 35 . 5 5 . 35 . . . . 15 . . . . . . . . . . . 5 8C 19 0.001 2.180 0.001 . 47 . 10 16 . . 5 . . . . . . 10 . . . 10 . . . . . CC 954 0.027 1.830 0.049 . 3 2 34 52 . 3 1 . . . 1 1 . . 2 . . . . . . . . DC 873 0.025 1.704 0.042 . 52 2 8 34 . . 1 . 1 1 . . . 1 . . . . . . . . . EC 11 0.000 1.278 0.000 9 72 . . 9 . 9 . . . . . . . . . . . . . . . . . FC 1 0.000 0.000 0.000 . . . 99 . . . . . . . . . . . . . . . . . . . . GC 2 0.000 1.000 0.000 . . . . . . . . . . . . . . . . . 50 . . . 50 . . HC 343 0.010 1.744 0.017 . 38 3 10 46 . . . . 1 1 . . . . . . . . . . . . . OC 19 0.001 0.880 0.000 5 83 . . 5 . 5 . . . . . . . . . . . . . . . . . PC 3 0.000 0.918 0.000 . 66 . . . . . . . 33 . . . . . . . . . . . . . . RC 4 0.000 1.000 0.000 . 50 . . 50 . . . . . . . . . . . . . . . . . . . SC 868 0.024 2.346 0.057 . 17 7 17 44 . 7 2 . . . 3 . . . . . 1 . . . . . . TC 1054 0.030 2.309 0.068 . 12 6 19 48 . 5 2 . . . 3 . . . 1 . . . . . . . . ZC 96 0.003 1.515 0.004 . 3 1 56 34 1 . . . . . . . . . 3 . . 1 . . . . . _C 23 0.001 2.359 0.002 . 52 . . 4 4 9 4 . . . 4 . . . 4 . 13 . . . . . 4 *D 1 0.000 0.000 0.000 . . . . . . . . . . . . . . . . 99 . . . . . . . 2D 5 0.000 1.371 0.000 . 20 . . . . . 59 . . . . . . . . 20 . . . . . . . 4D 4 0.000 1.500 0.000 . . 50 . . . . 25 . . . . . . . . 25 . . . . . . . 8D 8 0.000 0.544 0.000 . 87 . . . . . . . . 12 . . . . . . . . . . . . . CD 155 0.004 1.996 0.009 3 11 . 31 . . . 8 . 2 . . . . . . 44 . 1 . . . . . ED 272 0.008 1.785 0.014 . 50 3 8 . 1 . 32 . 3 1 . . . . . . . . . . . . . GD 75 0.002 2.039 0.004 . 49 5 8 . . . 26 . 4 4 . . . . . 3 . . . . . . . HD 1 0.000 0.000 0.000 . 99 . . . . . . . . . . . . . . . . . . . . . . OD 1435 0.040 1.847 0.074 . 44 3 6 . . . 40 . 3 1 . . . . . 1 . . . . . . . RD 2 0.000 1.000 0.000 . 50 50 . . . . . . . . . . . . . . . . . . . . . SD 45 0.001 0.806 0.001 . 2 . 7 . . . 2 . 2 . . . . . . 86 . . . . . . . TD 83 0.002 0.982 0.002 . 7 . 1 . . . 12 . . . . . . . . 79 . . . . . . . _D 102 0.003 2.491 0.007 2 32 16 . . . . 28 . 8 5 . . . . . 6 . 2 . . . . 1 2E 1 0.000 0.000 0.000 . . . . . . . . . . . . . . . 99 . . . . . . . . EE 8 0.000 2.406 0.001 12 . 12 . 12 . 12 12 . 37 . . . . . . . . . . . . . . GE 36 0.001 2.824 0.003 14 . 11 8 3 . 11 6 . 36 6 3 . . . . . . . 3 . . . . RE 1 0.000 0.000 0.000 . . . . . . . . . . 99 . . . . . . . . . . . . . _E 350 0.010 2.852 0.028 2 1 18 3 5 1 12 3 . 33 16 2 1 . . 1 . 1 . . . 1 . . 4F 1 0.000 0.000 0.000 . . 99 . . . . . . . . . . . . . . . . . . . . . EF 6 0.000 0.650 0.000 . . 17 . . . . . . 83 . . . . . . . . . . . . . . GF 2 0.000 0.000 0.000 . . . . . . . . . 99 . . . . . . . . . . . . . . OF 13 0.000 1.352 0.000 . . 8 . . . . 8 . 69 15 . . . . . . . . . . . . . TF 3 0.000 0.918 0.000 . . . . . . . 33 . . . . . . . . 66 . . . . . . . _F 8 0.000 0.811 0.000 . . . . . . . . . 74 . . . . . . 25 . . . . . . . 4G 3 0.000 1.585 0.000 . . . . . . 33 . . . . 33 33 . . . . . . . . . . . GG 1 0.000 0.000 0.000 . . . . . . . . . . . 99 . . . . . . . . . . . . LG 1 0.000 0.000 0.000 . . . . . . . . . . . . . . . 99 . . . . . . . . _G 134 0.004 2.889 0.011 4 1 2 . 6 2 30 1 1 13 17 19 1 . . 1 . . . . . 1 . . 2H 3 0.000 1.585 0.000 . . 33 . . . . . . 33 . . . . . . 33 . . . . . . . 4H 5 0.000 0.000 0.000 . . . . . . . . . . . . . . . . 99 . . . . . . . 8H 1 0.000 0.000 0.000 . 99 . . . . . . . . . . . . . . . . . . . . . . AH 4 0.000 1.500 0.000 . 25 25 . . . . . . . . . . . . . 50 . . . . . . . CH 80 0.002 1.724 0.004 . 10 . 32 . . . 7 . . . . . . . 1 48 . . . . . . . EH 40 0.001 2.120 0.002 2 45 . 2 2 2 . 30 . 5 10 . . . . . . . . . . . . . GH 59 0.002 1.775 0.003 . 49 3 5 . . . 30 . 12 . . . . . . . . . . . . . . OH 570 0.016 2.143 0.034 1 42 4 9 . . . 34 . 5 2 . . . . . 1 . 1 . . . . . RH 2 0.000 1.000 0.000 . . . . . . . 50 . . . . . . . . 50 . . . . . . . SH 25 0.001 1.145 0.001 . . 12 8 . . . 4 . . . . . . . . 75 . . . . . . . TH 39 0.001 1.123 0.001 . 5 3 8 . . . 5 . . . . . . . . 79 . . . . . . . _H 145 0.004 2.606 0.011 . 29 17 2 . . 1 18 . 16 5 . . . . . 11 . 1 . . . . . *I 1 0.000 0.000 0.000 . . . . . . . . . . . . . . . . . . 99 . . . . . 2I 1 0.000 0.000 0.000 . . . . . . . . . . . . . . . . . . 99 . . . . . 8I 1 0.000 0.000 0.000 . . . . . . . . . . . . . . . . . . 99 . . . . . AI 51 0.001 1.311 0.002 . . . . . 8 . . . . . . 50 . . . . . 41 . . . . . CI 11 0.000 1.495 0.000 . . . . . 18 . . . . . . 36 . . . . . 45 . . . . . DI 3 0.000 0.918 0.000 . . . . . . . . . . . . 33 . . . . . 66 . . . . . GI 1 0.000 0.000 0.000 . . . . . 99 . . . . . . . . . . . . . . . . . . HI 4 0.000 0.000 0.000 . . . . . . . . . . . . . . . . . . 99 . . . . . II 71 0.002 1.885 0.004 . . . . . 4 . . . . . . 14 . . . . . 39 6 36 . . . OI 9 0.000 0.764 0.000 . . . . . . . . . . . . 22 . . . . . 77 . . . . . RI 1 0.000 0.000 0.000 . . . . . . . . . . . . . . . . . . 99 . . . . . _I 1 0.000 0.000 0.000 . . . . . 99 . . . . . . . . . . . . . . . . . . 8L 1 0.000 0.000 0.000 . . . . . 99 . . . . . . . . . . . . . . . . . . CL 2 0.000 1.000 0.000 . . . 50 . 50 . . . . . . . . . . . . . . . . . . _L 2 0.000 1.000 0.000 . . 50 . . 50 . . . . . . . . . . . . . . . . . . 2O 116 0.003 1.489 0.005 . . . . 1 70 6 . . . . 1 15 1 3 . . . 2 . . 1 . . 4O 1629 0.046 1.574 0.072 . 1 . . 1 12 66 . . . . 16 1 . . . . 1 . . . . . . 8O 72 0.002 1.012 0.002 . . . . . 74 3 . . . . 1 21 . . . . . . . . . . . AO 5 0.000 2.322 0.000 20 . . 20 20 20 . . . . . . . . . 20 . . . . . . . . CO 172 0.005 1.126 0.005 . . . 2 1 75 3 . 1 . . . 17 . . . . . . . . . . . DO 80 0.002 1.726 0.004 1 1 . 2 4 66 . . . . . 2 17 . 2 1 . 1 . . . . . . EO 175 0.005 2.321 0.011 6 2 . . 1 42 5 . . 1 . 1 33 1 1 1 . 1 2 3 . . 1 . FO 3 0.000 0.000 0.000 . . . . . 99 . . . . . . . . . . . . . . . . . . GO 54 0.002 2.272 0.003 . . . 2 2 33 26 . . . . 24 9 . . 2 . . . . . 2 . . HO 58 0.002 1.144 0.002 . . . . . 73 2 . . . . 3 19 . . . . . . . 2 . . . LO 1 0.000 0.000 0.000 . . . . . . . . . . . . . . . . . . . 99 . . . . MO 11 0.000 1.677 0.001 . 9 . . . 36 45 . . . . 9 . . . . . . . . . . . . NO 15 0.000 1.237 0.001 7 . . . . 73 7 . . . . 13 . . . . . . . . . . . . OO 4 0.000 1.500 0.000 . . . . . 50 25 . . . . . 25 . . . . . . . . . . . PO 50 0.001 1.383 0.002 . 2 . . . 75 4 . . . 2 2 10 . 2 . . . . . . . . 2 RO 87 0.002 1.770 0.004 2 . . . 1 63 1 . . . . 3 20 1 3 . . . 2 1 . . . . SO 37 0.001 1.436 0.001 3 . . . . 67 16 . . . . . 11 . . . . 3 . . . . . . TO 49 0.001 1.735 0.002 . . . 4 4 63 4 . . . . 4 18 . . 2 . . . . . . . . ZO 10 0.000 2.046 0.001 10 . . 10 10 30 . . . . . . 40 . . . . . . . . . . . _O 1279 0.036 2.250 0.081 . . . . 1 41 23 . . . . 21 7 . . . . 3 . . . . . . 2P 1 0.000 0.000 0.000 . . . . . . . . . 99 . . . . . . . . . . . . . . 4P 2 0.000 1.000 0.000 . . 50 . . . . . . 50 . . . . . . . . . . . . . . AP 2 0.000 1.000 0.000 . . 50 . . . . . . . . . . . . . 50 . . . . . . . CP 17 0.000 2.469 0.001 6 . 12 6 . . . 12 . 35 6 . . . . . 23 . . . . . . . EP 11 0.000 1.673 0.001 9 9 9 9 . . . . . 63 . . . . . . . . . . . . . . GP 3 0.000 1.585 0.000 . . 33 . . . . 33 . . 33 . . . . . . . . . . . . . OP 68 0.002 2.106 0.004 1 1 9 4 1 . . 10 . 54 16 . . . . . 1 . . . . . . . RP 2 0.000 1.000 0.000 . 50 . . . . . . . 50 . . . . . . . . . . . . . . TP 12 0.000 1.252 0.000 . . . . . . . . . 17 17 . . . . . 66 . . . . . . . _P 98 0.003 2.039 0.006 . . 38 1 2 . . 4 . 36 10 . . . . . 7 . . . . . . . GR 22 0.001 2.772 0.002 23 . 27 18 5 5 . 5 . 9 5 . . . . . . 5 . . . . . . _R 125 0.004 2.415 0.008 10 . 21 2 . . . 36 . 17 11 1 . . . . . . 1 . . . 1 . *S 2 0.000 1.000 0.000 . 50 . 50 . . . . . . . . . . . . . . . . . . . . 2S 13 0.000 0.773 0.000 . 84 8 . . . 8 . . . . . . . . . . . . . . . . . 4S 1 0.000 0.000 0.000 . 99 . . . . . . . . . . . . . . . . . . . . . . 8S 38 0.001 0.690 0.001 . 86 8 . 5 . . . . . . . . . . . . . . . . . . . AS 1 0.000 0.000 0.000 . . . 99 . . . . . . . . . . . . . . . . . . . . CS 9 0.000 2.113 0.001 11 33 11 33 11 . . . . . . . . . . . . . . . . . . . DS 29 0.001 0.788 0.001 . 82 . 3 14 . . . . . . . . . . . . . . . . . . . ES 191 0.005 1.136 0.006 1 81 2 3 8 1 1 2 . . . 2 . . . 1 . . . . . . . . FS 2 0.000 1.000 0.000 . . . 50 50 . . . . . . . . . . . . . . . . . . . GS 37 0.001 1.186 0.001 . 78 3 . . 3 11 3 . 3 . . . . . . . . . . . . . . HS 25 0.001 0.951 0.001 . 75 . 4 20 . . . . . . . . . . . . . . . . . . . MS 6 0.000 0.650 0.000 . 83 . . . . 17 . . . . . . . . . . . . . . . . . NS 6 0.000 0.000 0.000 . 99 . . . . . . . . . . . . . . . . . . . . . . OS 9 0.000 0.503 0.000 . 88 . 11 . . . . . . . . . . . . . . . . . . . . PS 25 0.001 1.514 0.001 . 67 8 . 12 . . 8 . 4 . . . . . . . . . . . . . . RS 32 0.001 0.201 0.000 . 96 . . 3 . . . . . . . . . . . . . . . . . . . TS 2 0.000 1.000 0.000 . 50 . 50 . . . . . . . . . . . . . . . . . . . . _S 650 0.018 1.235 0.023 . 80 4 2 1 . 6 2 . . . 3 . . . . . . . . . . . . *T 1 0.000 0.000 0.000 . 99 . . . . . . . . . . . . . . . . . . . . . . 2T 14 0.000 1.435 0.001 . 71 . 7 7 . 7 . . . . 7 . . . . . . . . . . . . 8T 37 0.001 1.511 0.002 . 67 11 11 8 . 3 . . . . . . . . . . . . . . . . . CT 15 0.000 2.042 0.001 . 33 7 33 20 . 7 . . . . . . . . . . . . . . . . . DT 69 0.002 1.628 0.003 . 49 1 16 32 . . . . . . . . . . 1 . . . . . . . . ET 323 0.009 1.305 0.012 . 78 2 6 7 1 1 1 . . . 1 . . . 1 . 1 . . . . . . FT 23 0.001 0.927 0.001 . 82 4 . 9 . . 4 . . . . . . . . . . . . . . . . GT 31 0.001 0.612 0.001 . 89 . . 3 . 3 3 . . . . . . . . . . . . . . . . HT 61 0.002 2.179 0.004 . 52 6 11 16 2 . 6 . . . 2 2 . . . . . . . . . . 2 MT 4 0.000 1.500 0.000 . 50 . . . . 25 . . . . 25 . . . . . . . . . . . . NT 5 0.000 1.371 0.000 . 59 . . . 20 20 . . . . . . . . . . . . . . . . . OT 8 0.000 1.061 0.000 . 74 . 12 . . . 12 . . . . . . . . . . . . . . . . PT 91 0.003 1.233 0.003 . 76 8 5 8 . . 1 . . . . . . . . . . . . . 1 . . RT 42 0.001 0.866 0.001 . 85 . 5 . . 2 . . . . . 2 . . 5 . . . . . . . . ST 3 0.000 0.918 0.000 . 66 . . 33 . . . . . . . . . . . . . . . . . . . TT 1 0.000 0.000 0.000 . . . . . . . . . . . 99 . . . . . . . . . . . . ZT 2 0.000 1.000 0.000 . 50 . 50 . . . . . . . . . . . . . . . . . . . . _T 723 0.020 1.622 0.033 . 72 4 1 3 1 10 2 . . . 4 . . . . . 1 . . . . . . DZ 199 0.006 1.476 0.008 . 26 1 62 5 . . 2 . 1 . . . . . . . . . . . . . . FZ 4 0.000 1.000 0.000 . 50 . . 50 . . . . . . . . . . . . . . . . . . . HZ 121 0.003 1.558 0.005 1 26 4 61 5 . . 2 . . . . . . . 1 . . . . . . . . PZ 21 0.001 2.064 0.001 . 42 9 24 14 . . 9 . . . . . . . . . . . . . . . . *_ 13 0.000 2.288 0.001 . . 38 . . . 8 . 23 15 8 . 8 . . . . . . . . . . . 2_ 70 0.002 2.839 0.006 . . 28 1 3 1 . 25 7 7 11 6 . . . 7 . 1 . . . . . . 4_ 5 0.000 1.922 0.000 . . . . . . 40 . 20 . . 20 . . . . . . . . . 20 . . 6_ 11 0.000 2.482 0.001 . . 9 . 18 . . . 27 . 18 9 . . . 18 . . . . . . . . 8_ 70 0.002 2.624 0.005 . . 30 6 3 8 . . 33 6 7 3 . . . 3 . . . . . 1 . . A_ 6 0.000 2.252 0.000 . . 17 . 17 . 17 . 33 . 17 . . . . . . . . . . . . . C_ 7 0.000 2.236 0.000 . . 14 14 28 . . . 28 . . . 14 . . . . . . . . . . . D_ 12 0.000 2.855 0.001 . 17 8 8 8 . . 17 . 25 8 . 8 . . . . . . . . . . . E_ 1028 0.029 3.265 0.094 . 1 20 2 11 4 4 2 13 17 15 3 1 . . 5 . 2 . . . . . . F_ 1 0.000 0.000 0.000 . . . . . . . . . 99 . . . . . . . . . . . . . . G_ 3309 0.093 2.923 0.271 . . 16 2 9 9 1 1 38 6 5 3 3 . . 4 . 2 . . . . . . H_ 4 0.000 1.500 0.000 . . 50 . . . . . . 25 25 . . . . . . . . . . . . . K_ 55 0.002 2.698 0.004 . . 14 5 16 2 . . 18 4 4 2 . . . 32 . 2 . . . . . . L_ 39 0.001 2.462 0.003 . . 28 3 . . . . 10 28 20 3 . . . 5 . 3 . . . . . . M_ 389 0.011 2.634 0.029 . . 29 2 7 1 1 1 6 27 21 . . . . 3 . 1 . . . . . . N_ 446 0.013 2.548 0.032 . 1 34 2 6 1 . 2 5 24 21 1 . . . 2 . 1 . . . . . . O_ 31 0.001 3.046 0.003 . . 26 6 3 10 3 . 22 6 3 6 3 . . 10 . . . . . . . . P_ 3 0.000 1.585 0.000 . 33 33 . . . . . . 33 . . . . . . . . . . . . . . R_ 589 0.017 2.809 0.046 . . 33 3 4 1 1 10 7 15 21 1 . . . 2 . 2 . . . . . 1 S_ 4 0.000 1.500 0.000 . . 25 . 25 . . . 50 . . . . . . . . . . . . . . . T_ 1 0.000 0.000 0.000 . . . . . . . . . . 99 . . . . . . . . . . . . . Z_ 2 0.000 1.000 0.000 . . 50 . . . . . . . 50 . . . . . . . . . . . . . __ 1 0.000 0.000 0.000 . . . . . . 99 . . . . . . . . . . . . . . . . . ----- ----- ----- ----- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- TOT 35620 1.000 1.767 1.767 17 12 11 11 8 7 6 6 5 4 3 3 3 1 1 1 1 1 0 0 0 0 0 0 Looking at this table we can see that these frequent digraphs are almost always (>80%) word-final A6 C6 O6 IE 2G 8G CG DG EG HG PG RG TG SG ZG AK AL IL AM AN and these are quite often (>10%) word-final C2 E2 G2 8E AE DE HE OE SE TE OG OM AR ER IR OR GR _R and these are almost never word-final: .2 .4 .8 .A .C .D GE _E .F .H .I .O .P .S .T .Z These are the additional breaks we add if we assume these letters are always word-final: .M 33 .N 37 .K 2 .L 5 .6 1 .G 470 .R 326 .E 1325 If we assume that G is always word-final, we will add about 470 new word breaks. Of these, 190 will be after "8G" 70 after "CG", and 130 after _G. We could fix this problem by correcting " GH" --> " OH" " GD" --> " OD" "4G" --> "4O" and then breaking after all remaining "G"s. It doesn't seem possible to break automatically after "R" or "E", without changing substantially the set of words. Here is the analogous table for backwards transitions: cat .voyn.tri \ | tr '/' '_' \ | sed -e 's/\(.\)\(..\)/\2\1/g' \ | count-transition-freqs \ -v chars='_/COG8EDA4TSHRNM2ZPIKLF6*' \ > .voyn-word-beg.frq Transition probabilities (× 99): count freq ntrpy pntpy _ C O G 8 E D A 4 T S H R N M 2 Z P I K L F 6 * -- ----- ----- ----- ----- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- ** 3 0.000 1.585 0.000 33 33 33 . . . . . . . . . . . . . . . . . . . . . *8 5 0.000 1.922 0.000 20 40 . . . . . 20 . 20 . . . . . . . . . . . . . . *A 4 0.000 1.500 0.000 50 25 25 . . . . . . . . . . . . . . . . . . . . . *C 3 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . *D 1 0.000 0.000 0.000 . 99 . . . . . . . . . . . . . . . . . . . . . . *G 2 0.000 1.000 0.000 . 50 . . . . . . . . . . . . . . . . . . . . . 50 *I 1 0.000 0.000 0.000 . . . . . . . . . . . . . . . . . . . . . . . 99 *N 1 0.000 0.000 0.000 . 99 . . . . . . . . . . . . . . . . . . . . . . *O 1 0.000 0.000 0.000 . . . . . . . 99 . . . . . . . . . . . . . . . . *S 2 0.000 1.000 0.000 50 . . . . . 50 . . . . . . . . . . . . . . . . . *T 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . *_ 13 0.000 2.815 0.001 8 23 . . . . 8 23 . 8 . 15 8 . . . . . . . . . . 8 28 3 0.000 1.585 0.000 . 33 . . . . . . . 33 33 . . . . . . . . . . . . . 2A 131 0.004 0.976 0.004 85 2 1 5 . 4 . . 1 . . 1 1 . 2 . . . . . . . . . 2C 4 0.000 1.500 0.000 25 50 . 25 . . . . . . . . . . . . . . . . . . . . 2D 5 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . 2E 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . 2G 10 0.000 2.446 0.001 30 20 . . . 20 . 10 . . . . . 10 . . 10 . . . . . . . 2H 3 0.000 1.585 0.000 33 33 . 33 . . . . . . . . . . . . . . . . . . . . 2I 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . 2O 116 0.003 0.553 0.002 91 3 . 2 . 1 . 1 1 . . . . . . . . . . . . . . . 2P 1 0.000 0.000 0.000 . . . 99 . . . . . . . . . . . . . . . . . . . . 2S 13 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . 2T 14 0.000 0.592 0.000 85 . . 14 . . . . . . . . . . . . . . . . . . . . 2_ 70 0.002 2.380 0.005 13 47 8 4 1 11 . 7 . 7 . . . . . . . . . . . . . . 42 2 0.000 1.000 0.000 50 . . 50 . . . . . . . . . . . . . . . . . . . . 4A 4 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . 4C 20 0.001 0.286 0.000 94 . 5 . . . . . . . . . . . . . . . . . . . . . 4D 4 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . 4F 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . 4G 3 0.000 0.918 0.000 66 . . 33 . . . . . . . . . . . . . . . . . . . . 4H 5 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . 4O 1629 0.046 0.442 0.020 92 . . 5 . 1 . . . . . . . . . . . . . . . . . . 4P 2 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . 4S 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . 4_ 5 0.000 0.722 0.000 79 20 . . . . . . . . . . . . . . . . . . . . . . 6A 1 0.000 0.000 0.000 . . . . . . . . . . . . 99 . . . . . . . . . . . 6_ 11 0.000 1.686 0.001 9 18 18 . . . . 54 . . . . . . . . . . . . . . . . 82 1 0.000 0.000 0.000 . 99 . . . . . . . . . . . . . . . . . . . . . . 84 2 0.000 1.000 0.000 . 50 . 50 . . . . . . . . . . . . . . . . . . . . 88 3 0.000 1.585 0.000 . . . . . 33 . . . . . 33 33 . . . . . . . . . . . 8A 423 0.012 1.623 0.019 68 17 2 4 . 4 . . . 3 . . . 1 . . . . . . . . . . 8C 19 0.001 1.925 0.001 47 26 10 10 . . . . . . 5 . . . . . . . . . . . . . 8D 8 0.000 0.811 0.000 74 25 . . . . . . . . . . . . . . . . . . . . . . 8E 10 0.000 1.571 0.000 59 20 . . 10 . 10 . . . . . . . . . . . . . . . . . 8G 2052 0.058 1.029 0.059 3 84 1 . . 3 . . . 4 2 . . . . . 1 . . . . . . . 8H 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . 8I 1 0.000 0.000 0.000 . 99 . . . . . . . . . . . . . . . . . . . . . . 8L 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . 8O 72 0.002 1.426 0.003 73 11 . 6 . 3 . . . 3 . . . . . . 3 1 . . . . . . 8R 2 0.000 1.000 0.000 50 . . . . . . 50 . . . . . . . . . . . . . . . . 8S 38 0.001 0.524 0.001 91 3 . 3 . 3 . . . . . . . . . . . . . . . . . . 8T 37 0.001 1.227 0.001 75 11 8 3 . 3 . . . . . . . . . . . . . . . . . . 8_ 70 0.002 1.657 0.003 4 72 4 1 . 4 . 4 . 4 1 . . . 1 . 1 . . . . . . . A* 5 0.000 0.971 0.000 . . . . 59 . 40 . . . . . . . . . . . . . . . . . A2 7 0.000 1.842 0.000 14 . . . . . 42 . . . . 28 . . . 14 . . . . . . . . A6 6 0.000 1.252 0.000 . . . . . . 66 . . . . 17 17 . . . . . . . . . . . A8 8 0.000 2.156 0.000 12 12 . . 25 . 37 . . . . 12 . . . . . . . . . . . . AA 1 0.000 0.000 0.000 . . . . . 99 . . . . . . . . . . . . . . . . . . AD 4 0.000 1.500 0.000 25 . . . 50 . . . . . . . . . . 25 . . . . . . . . AE 552 0.015 2.897 0.045 7 4 . . 22 2 31 . . 3 2 15 4 . . 5 1 1 . . . . . . AG 4 0.000 1.500 0.000 25 25 . . . . 50 . . . . . . . . . . . . . . . . . AH 4 0.000 0.811 0.000 74 . . . . . . . . . . . 25 . . . . . . . . . . . AI 51 0.001 2.643 0.004 6 . . . 21 10 27 . . . . 10 12 . . 14 . . . . . . . . AK 43 0.001 2.987 0.004 12 5 . . 23 5 25 . . 9 2 5 9 . . 2 . . . . . . 2 . AL 12 0.000 2.451 0.001 . 8 . . 8 . 41 . . . 8 8 17 . . 8 . . . . . . . . AM 399 0.011 2.516 0.028 8 . . . 24 3 38 . . . . 9 6 . . 9 . . . . . . . . AN 475 0.013 2.101 0.028 3 . . . 11 3 56 . . . . 14 4 . . 6 . . . . . . . 1 AO 5 0.000 1.371 0.000 . . . . . 20 59 . . . . . . 20 . . . . . . . . . . AP 2 0.000 1.000 0.000 . . . . 50 . 50 . . . . . . . . . . . . . . . . . AR 395 0.011 2.906 0.032 6 5 . 1 29 3 24 . . 2 2 15 5 . . 6 1 1 . . . . . . AS 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . A_ 6 0.000 1.792 0.000 . 17 . . 50 . . . 17 . . . . . . . . 17 . . . . . . C* 10 0.000 2.446 0.001 10 10 . . . . 20 . 10 30 20 . . . . . . . . . . . . . C2 45 0.001 2.131 0.003 2 44 . . . . 4 . . 31 7 2 . . . 2 7 . . . . . . . C4 1 0.000 0.000 0.000 . . . . . . . . . . 99 . . . . . . . . . . . . . C6 2 0.000 1.000 0.000 . . . . . . . . . 50 . 50 . . . . . . . . . . . . C8 1897 0.053 2.359 0.126 . 26 . . . . 15 . . 27 20 8 . . . . 2 . . . . . . . CA 55 0.002 2.095 0.003 2 18 . . 2 . 14 . . 38 25 . . . . . . . . . . . . . CC 954 0.027 2.296 0.061 1 3 2 . 1 1 47 . 1 13 16 14 . . . . . . . . . . . . CD 155 0.004 1.876 0.008 1 17 1 . . 1 . . 4 36 39 . . . . . . . . . . . . . CE 4 0.000 2.000 0.000 25 25 . . . . . . . . . 25 . . . . 25 . . . . . . . CF 3 0.000 1.585 0.000 . 33 . 33 . . . . . . 33 . . . . . . . . . . . . . CG 842 0.024 2.274 0.054 . 38 . . . . 8 . . 24 18 4 . . . . 6 . . . . . . . CH 80 0.002 1.747 0.004 1 17 . . . . . . 4 43 33 . . . . . . . . . . . . . CI 11 0.000 2.914 0.001 . 9 . . 18 . 9 . . 9 18 9 . . . 18 9 . . . . . . . CL 2 0.000 1.000 0.000 . . . . . . . . . 50 50 . . . . . . . . . . . . . CM 8 0.000 1.299 0.000 . . . . 25 . 62 . . . . 12 . . . . . . . . . . . . CO 172 0.005 1.966 0.009 . 9 . . . . 9 . . 40 35 6 . . . . 1 . . . . . . . CP 17 0.000 2.095 0.001 17 12 . 6 . . . . . 29 35 . . . . . . . . . . . . . CR 8 0.000 1.299 0.000 . 62 . . . . 12 . . 25 . . . . . . . . . . . . . . CS 9 0.000 0.764 0.000 . . . . . . 77 . . . . 22 . . . . . . . . . . . . CT 15 0.000 1.857 0.001 . 7 . . . . 53 . . 13 . 20 . . . . . 7 . . . . . . C_ 7 0.000 2.128 0.000 . 42 14 . . 14 . . . 14 14 . . . . . . . . . . . . . D* 2 0.000 1.000 0.000 50 . 50 . . . . . . . . . . . . . . . . . . . . . D8 2 0.000 0.000 0.000 . . 99 . . . . . . . . . . . . . . . . . . . . . DA 742 0.021 1.221 0.025 4 2 77 3 . 12 . . . 1 . . . . . . . . . . . . . . DC 873 0.025 1.392 0.034 4 2 72 4 1 16 . . . 1 . . . . . . . . . . . . . . DE 11 0.000 0.946 0.000 . . 63 . . 36 . . . . . . . . . . . . . . . . . . DG 169 0.005 1.762 0.008 . 28 51 4 . 13 . 1 . 1 2 . . . . . . . . . . . . . DI 3 0.000 0.918 0.000 66 33 . . . . . . . . . . . . . . . . . . . . . . DM 1 0.000 0.000 0.000 . . 99 . . . . . . . . . . . . . . . . . . . . . DO 80 0.002 1.624 0.004 20 . 62 5 . 9 . . 2 . . . 1 . . . . . . . . . . . DS 29 0.001 1.734 0.001 17 . 58 10 3 10 . . . . . . . . . . . . . . . . . . DT 69 0.002 1.628 0.003 11 4 65 4 . 13 . . . . 1 . . . . . . . . . . . . . DZ 199 0.006 2.148 0.012 3 34 6 1 . . . . . 33 19 . . . . . . . . . . . . . D_ 12 0.000 1.784 0.001 17 41 33 . . . . 8 . . . . . . . . . . . . . . . . E2 16 0.000 2.049 0.001 25 . 31 . . . . 31 . 6 . . . . . 6 . . . . . . . . E4 9 0.000 1.352 0.000 . . 55 . 11 . . 33 . . . . . . . . . . . . . . . . E8 87 0.002 1.759 0.004 20 . 50 1 . 1 . 24 . 1 1 . . . . . . . . . . . . . EA 55 0.002 1.940 0.003 20 . 54 4 . 2 2 14 . 2 . . . . . . . . 2 . . . . . EC 11 0.000 0.684 0.000 18 . 81 . . . . . . . . . . . . . . . . . . . . . ED 272 0.008 0.996 0.008 16 . 78 1 . . . 3 . . . . . . . . . . . . . . . . EE 8 0.000 0.811 0.000 25 . 74 . . . . . . . . . . . . . . . . . . . . . EF 6 0.000 0.918 0.000 33 . 66 . . . . . . . . . . . . . . . . . . . . . EG 106 0.003 1.459 0.004 8 . 53 3 . . . 35 . . . . . . . . . . . . . . . . EH 40 0.001 1.331 0.001 17 . 67 2 . . . 12 . . . . . . . . . . . . . . . . EK 2 0.000 1.000 0.000 . . . 50 . . . 50 . . . . . . . . . . . . . . . . EO 175 0.005 1.783 0.009 36 . 41 2 . 1 1 18 . . . 1 . . . . . . . . . . . . EP 11 0.000 1.241 0.000 27 . 63 . . . . . . . . . . . . . . . 9 . . . . . ER 13 0.000 2.038 0.001 30 . 38 . . . 8 15 . 8 . . . . . . . . . . . . . . ES 191 0.005 1.635 0.009 30 1 52 1 2 . . 13 . . . . 1 . . . . . . . . . . . ET 323 0.009 1.749 0.016 36 . 48 4 . 1 1 9 . 1 . . . . . . . . . . . . . . E_ 1028 0.029 1.355 0.039 1 . 58 . . . 1 36 . 1 . . . . . . . . 1 . . . . . FA 2 0.000 1.000 0.000 . . 50 . . . . . . 50 . . . . . . . . . . . . . . FC 1 0.000 0.000 0.000 . 99 . . . . . . . . . . . . . . . . . . . . . . FO 3 0.000 1.585 0.000 . . 33 . . 33 . . 33 . . . . . . . . . . . . . . . FS 2 0.000 0.000 0.000 . . 99 . . . . . . . . . . . . . . . . . . . . . FT 23 0.001 2.017 0.001 26 4 39 9 . 22 . . . . . . . . . . . . . . . . . . FZ 4 0.000 1.000 0.000 50 . . . . . . . . 50 . . . . . . . . . . . . . . F_ 1 0.000 0.000 0.000 . 99 . . . . . . . . . . . . . . . . . . . . . . G2 16 0.000 2.475 0.001 6 6 . . 43 . . . . 6 . 6 . . . 6 19 . . . 6 . . . G4 91 0.003 1.484 0.004 2 24 . . 64 2 1 . . . . 1 . . . . 4 . . . . . . . G8 35 0.001 2.921 0.003 23 11 . . 25 3 6 . . 3 3 8 6 . . . 11 . . . . . . . GA 3 0.000 1.585 0.000 33 33 . . 33 . . . . . . . . . . . . . . . . . . . GC 2 0.000 1.000 0.000 50 . . . 50 . . . . . . . . . . . . . . . . . . . GD 75 0.002 2.030 0.004 53 12 3 . 22 1 1 . 1 . 3 . . . . . 1 1 . . . . . . GE 36 0.001 2.099 0.002 8 22 . . 50 . 6 . . . . 8 3 . . . 3 . . . . . . . GF 2 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . GG 1 0.000 0.000 0.000 . . . . 99 . . . . . . . . . . . . . . . . . . . GH 59 0.002 2.390 0.004 44 15 . 2 23 2 2 . 2 2 2 3 2 . 2 . . . . . . . . . GI 1 0.000 0.000 0.000 . . . . 99 . . . . . . . . . . . . . . . . . . . GK 1 0.000 0.000 0.000 . . . . 99 . . . . . . . . . . . . . . . . . . . GO 54 0.002 2.334 0.004 6 18 2 . 50 2 4 . . 2 2 6 . . . . 9 . . . . . . . GP 3 0.000 0.918 0.000 . 33 . . 66 . . . . . . . . . . . . . . . . . . . GR 22 0.001 2.184 0.001 9 18 . . 50 5 9 . 5 . . 5 . . . . . . . . . . . . GS 37 0.001 1.572 0.002 62 . 5 . 24 . . . . . 3 3 3 . . . . . . . . . . . GT 31 0.001 1.641 0.001 54 . 3 . 32 3 3 . . . . 3 . . . . . . . . . . . . G_ 3310 0.093 2.008 0.187 . 23 . . 56 3 5 . . 2 1 2 1 . . . 6 . . . . . . . H* 2 0.000 0.000 0.000 . . 99 . . . . . . . . . . . . . . . . . . . . . H2 1 0.000 0.000 0.000 . 99 . . . . . . . . . . . . . . . . . . . . . . H8 3 0.000 0.918 0.000 . . 66 . . 33 . . . . . . . . . . . . . . . . . . HA 259 0.007 1.364 0.010 10 2 74 7 . 5 . . . 1 . . . . . . . . . . . . . . HC 343 0.010 1.469 0.014 12 2 70 8 . 5 . . . 1 . . . . . . . . . . . . . . HD 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . HE 3 0.000 0.918 0.000 . . 66 . . 33 . . . . . . . . . . . . . . . . . . HG 88 0.002 1.679 0.004 3 29 56 3 . 1 . . . 3 2 . . . . . . . . . . . . . HI 4 0.000 0.811 0.000 25 . 74 . . . . . . . . . . . . . . . . . . . . . HO 58 0.002 1.738 0.003 43 . 43 3 . . . 2 . 2 5 . . . . 2 . . . . . . . . HS 25 0.001 1.406 0.001 28 . 55 . . 16 . . . . . . . . . . . . . . . . . . HT 61 0.002 1.667 0.003 39 . 44 11 . 3 . . . . . . . . . 2 . . . . . . . . HZ 121 0.003 2.475 0.008 13 32 6 . . . . 2 4 25 16 . 1 . . 1 . . . . . . . . H_ 4 0.000 0.811 0.000 . . 74 . . 25 . . . . . . . . . . . . . . . . . . IE 11 0.000 2.118 0.001 9 18 . 9 . . . 36 . . . . . . . . . . 27 . . . . . II 71 0.002 2.374 0.005 . 7 10 . 1 . 3 29 . . . 6 1 . . 1 . . 39 . . . . 1 IK 4 0.000 0.000 0.000 . . . . . . . . . . . . . . . . . . 99 . . . . . IL 26 0.001 0.000 0.000 . . . . . . . . . . . . . . . . . . 99 . . . . . IR 43 0.001 1.579 0.002 . 9 5 . . . 2 60 . . . . . . . . . . 23 . . . . . KE 1 0.000 0.000 0.000 . . . . . 99 . . . . . . . . . . . . . . . . . . KO 1 0.000 0.000 0.000 . . . . . . . 99 . . . . . . . . . . . . . . . . K_ 55 0.002 1.161 0.002 . . 13 2 . 2 . 76 . . . . . . . . . . 7 . . . . . LE 3 0.000 1.585 0.000 33 33 . . 33 . . . . . . . . . . . . . . . . . . . LG 1 0.000 0.000 0.000 . 99 . . . . . . . . . . . . . . . . . . . . . . LO 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . L_ 39 0.001 1.049 0.001 . . 3 . . . . 30 . . . . . . . . . . 66 . . . . . M2 2 0.000 0.000 0.000 . . . . . . . 99 . . . . . . . . . . . . . . . . M4 1 0.000 0.000 0.000 . . . . . . . 99 . . . . . . . . . . . . . . . . M8 2 0.000 0.000 0.000 . . . . . . . 99 . . . . . . . . . . . . . . . . MA 1 0.000 0.000 0.000 . . . . . . . 99 . . . . . . . . . . . . . . . . MG 6 0.000 0.650 0.000 . . 17 . . . . 83 . . . . . . . . . . . . . . . . MO 11 0.000 0.684 0.000 . . 18 . . . . 81 . . . . . . . . . . . . . . . . MS 6 0.000 0.650 0.000 . 17 . . . . . 83 . . . . . . . . . . . . . . . . MT 4 0.000 0.000 0.000 . . . . . . . 99 . . . . . . . . . . . . . . . . M_ 389 0.011 0.341 0.004 . 2 3 . . . . 94 . . . . . . . . . . . . . . . . N2 1 0.000 0.000 0.000 . . . . . . . 99 . . . . . . . . . . . . . . . . N4 1 0.000 0.000 0.000 . . . . . . . 99 . . . . . . . . . . . . . . . . N8 5 0.000 0.000 0.000 . . . . . . . 99 . . . . . . . . . . . . . . . . NA 2 0.000 0.000 0.000 . . . . . . . 99 . . . . . . . . . . . . . . . . NG 2 0.000 0.000 0.000 . . . . . . . 99 . . . . . . . . . . . . . . . . NO 15 0.000 0.353 0.000 . . 7 . . . . 92 . . . . . . . . . . . . . . . . NS 6 0.000 0.000 0.000 . . . . . . . 99 . . . . . . . . . . . . . . . . NT 5 0.000 0.000 0.000 . . . . . . . 99 . . . . . . . . . . . . . . . . N_ 446 0.013 0.126 0.002 . . 1 . . . . 97 . . . . . . . . . . . . . . . . O* 2 0.000 1.000 0.000 50 . . . . . . . . . . . . . . . . 50 . . . . . . O2 7 0.000 2.522 0.000 28 . . 14 . 14 14 14 . 14 . . . . . . . . . . . . . . O4 8 0.000 1.061 0.000 74 12 . . . . . . 12 . . . . . . . . . . . . . . . O6 2 0.000 0.000 0.000 . . . . . 99 . . . . . . . . . . . . . . . . . . O8 32 0.001 2.797 0.003 28 3 . 3 . 6 9 3 31 6 . . 3 . . 3 3 . . . . . . . OA 3 0.000 0.000 0.000 . . . . . . . . 99 . . . . . . . . . . . . . . . OC 19 0.001 1.999 0.001 16 . . . . 16 5 . 52 . . . . . 5 . . 5 . . . . . . OD 1435 0.040 1.087 0.044 21 . . 1 . 1 . . 75 . . . . . . . . . . . . . . . OE 1344 0.038 3.054 0.115 39 10 . 1 4 5 4 . 14 2 2 3 4 1 . 6 . 3 . . . . . . OF 13 0.000 1.614 0.001 46 . . 8 . . . . 38 . . . . . . 8 . . . . . . . . OG 14 0.000 2.807 0.001 14 28 . 7 . . 14 7 7 14 . . . . . . 7 . . . . . . . OH 570 0.016 1.393 0.022 47 . . 2 . . . . 47 . . . 1 . . . . . . . . . . . OI 9 0.000 1.975 0.000 22 . . . . 33 . . . . . . 22 . . 22 . . . . . . . . OK 7 0.000 1.149 0.000 . . . . . 71 . . . . . . 14 . . . . . . . 14 . . . OL 1 0.000 0.000 0.000 . . . . . . . . . . . 99 . . . . . . . . . . . . OM 14 0.000 2.503 0.001 21 . . . . 14 14 . . . . . 21 . . 21 . 7 . . . . . . ON 7 0.000 2.128 0.000 42 . . . . 14 . . 14 . . . 14 . . 14 . . . . . . . . OO 4 0.000 0.811 0.000 25 . . . . . . . 74 . . . . . . . . . . . . . . . OP 68 0.002 1.227 0.002 61 . . . . 1 1 . 33 . 1 . . . . . . . . . . . . . OR 302 0.008 3.150 0.027 31 10 . 2 5 19 5 . 5 3 1 4 6 . . 6 1 2 . . . . . . OS 9 0.000 1.224 0.000 66 . . . . . . . 22 . . . . . . . . 11 . . . . . . OT 8 0.000 1.406 0.000 37 . . . . 12 . . 50 . . . . . . . . . . . . . . . O_ 31 0.001 2.735 0.002 10 . . . . 35 3 3 26 . 3 . 6 3 . . 3 . . 3 . . . 3 P8 3 0.000 0.918 0.000 66 . 33 . . . . . . . . . . . . . . . . . . . . . PA 14 0.000 1.689 0.001 28 14 50 7 . . . . . . . . . . . . . . . . . . . . PC 3 0.000 1.585 0.000 . . 33 . . 33 . . . . . . 33 . . . . . . . . . . . PG 6 0.000 1.792 0.000 17 17 50 . . 17 . . . . . . . . . . . . . . . . . . PO 50 0.001 1.305 0.002 75 4 12 2 . 2 . 2 2 . . . . . . . . . . . . . . . PS 25 0.001 1.713 0.001 40 4 44 4 . . . . . 8 . . . . . . . . . . . . . . PT 91 0.003 1.936 0.005 39 7 40 . . 8 . . 1 2 . . 1 . . 1 . . . . . . . . PZ 21 0.001 1.933 0.001 33 19 5 . . . . 5 . 38 . . . . . . . . . . . . . . P_ 3 0.000 1.585 0.000 . 33 33 . . 33 . . . . . . . . . . . . . . . . . . R* 1 0.000 0.000 0.000 . . . . . . . . . . . . . . . . . . 99 . . . . . R2 1 0.000 0.000 0.000 . . . . . . . 99 . . . . . . . . . . . . . . . . R4 3 0.000 0.918 0.000 . . 33 . . . . 66 . . . . . . . . . . . . . . . . R6 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . R8 6 0.000 1.459 0.000 . . . 17 . . . 50 . 33 . . . . . . . . . . . . . . RA 96 0.003 1.783 0.005 46 . 31 1 . 1 . 15 . . . . . . . . . . 4 . . . . . RC 4 0.000 0.000 0.000 . . 99 . . . . . . . . . . . . . . . . . . . . . RD 2 0.000 1.000 0.000 . . 50 . . . . 50 . . . . . . . . . . . . . . . . RE 1 0.000 0.000 0.000 . . . 99 . . . . . . . . . . . . . . . . . . . . RG 45 0.001 1.997 0.003 7 2 31 9 . . . 44 . . . . . . . . . . 7 . . . . . RH 2 0.000 1.000 0.000 50 . . . . . . 50 . . . . . . . . . . . . . . . . RI 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . RO 87 0.002 2.046 0.005 31 . 32 7 1 1 . 26 . . . . . . . . . . 1 . . . . . RP 2 0.000 1.000 0.000 . . . 50 . . . 50 . . . . . . . . . . . . . . . . RS 32 0.001 1.660 0.001 43 . 34 3 . . . 19 . . . . . . . . . . . . . . . . RT 42 0.001 1.898 0.002 50 . 21 5 . 2 . 19 . . . . . . . . . . 2 . . . . . R_ 589 0.017 1.654 0.027 2 1 34 1 . 2 . 53 . . . . . . . . . . 6 . . . . . S2 1 0.000 0.000 0.000 . . . . . 99 . . . . . . . . . . . . . . . . . . S8 40 0.001 2.572 0.003 17 2 . . 5 40 10 . . . . 12 2 . . . . 7 . . . 2 . . SA 21 0.001 1.280 0.001 71 . . 5 . 14 . . . . . . . . . . . 9 . . . . . . SC 868 0.024 2.072 0.050 60 . 1 3 4 18 3 . . . . 2 4 1 1 1 . 2 . . . . . . SD 45 0.001 0.882 0.001 84 . . 9 . 2 . . . . . . . . 2 2 . . . . . . . . SE 5 0.000 1.371 0.000 59 . . 20 . 20 . . . . . . . . . . . . . . . . . . SG 28 0.001 2.547 0.002 42 11 4 . . 21 4 4 . 4 . 4 . . . . . . . . . 4 . 4 SH 25 0.001 0.529 0.000 87 . . . . 12 . . . . . . . . . . . . . . . . . . SO 37 0.001 1.595 0.002 70 3 . 3 8 8 . . . . . . . . . 3 . 5 . . . . . . SR 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . ST 3 0.000 1.585 0.000 33 . . 33 . . . . . . . . . . . . . 33 . . . . . . S_ 4 0.000 1.500 0.000 50 25 . . . 25 . . . . . . . . . . . . . . . . . . T* 2 0.000 1.000 0.000 50 . . . . . . . . . . 50 . . . . . . . . . . . . T2 6 0.000 1.918 0.000 17 . . . . 33 17 . . . . . 33 . . . . . . . . . . . T8 96 0.003 2.724 0.007 25 3 . 1 3 23 23 . . . 1 10 . . . 1 . 7 . . . 2 . . TA 26 0.001 1.956 0.001 57 . 4 4 . 11 . . . . . 15 . . . . . 4 . . . 4 . . TC 1054 0.030 2.328 0.069 49 . 1 3 2 24 3 . . . . 3 3 . . 1 . 7 . . . 2 . . TD 83 0.002 0.926 0.002 86 1 . 1 1 5 . . . . . . 1 1 1 1 . . . . . . . . TE 13 0.000 1.489 0.001 61 . . . . 23 . . . . . 8 . 8 . . . . . . . . . . TF 3 0.000 0.918 0.000 66 . . . . . . . . . . . . . . . . 33 . . . . . . TG 62 0.002 2.936 0.005 10 8 2 . 6 30 18 . . . . 11 3 . . 2 2 8 . . . . . . TH 39 0.001 1.142 0.001 79 . . . . 10 . . . 3 . 3 . . 3 3 . . . . . . . . TO 49 0.001 2.156 0.003 53 2 . . 8 10 2 . . . . 8 . . . . . 14 . . . 2 . . TP 12 0.000 0.811 0.000 74 . . . . 25 . . . . . . . . . . . . . . . . . . TR 4 0.000 1.500 0.000 50 . . . . . . . . . . 25 25 . . . . . . . . . . . TS 2 0.000 1.000 0.000 50 . . . . 50 . . . . . . . . . . . . . . . . . . TT 1 0.000 0.000 0.000 99 . . . . . . . . . . . . . . . . . . . . . . . T_ 1 0.000 0.000 0.000 . . . . . 99 . . . . . . . . . . . . . . . . . . Z2 1 0.000 0.000 0.000 . . . . . . . . . . . 99 . . . . . . . . . . . . Z8 21 0.001 1.750 0.001 . . . . . . 47 . . . . 28 . . . . . 14 . . . 9 . . ZA 9 0.000 1.436 0.000 . . . . . . 55 . . . . 22 . . . . . 22 . . . . . . ZC 96 0.003 1.438 0.004 . . . . . . 55 . . . . 33 . . . . . 9 . . . 2 . . ZG 204 0.006 1.095 0.006 . . . . . . 61 . . . . 36 . . . . . 2 . . . . . . ZO 10 0.000 1.485 0.000 . . . . . . 30 . . . . 50 . . . . . 20 . . . . . . ZT 2 0.000 0.000 0.000 . . . . . . 99 . . . . . . . . . . . . . . . . . Z_ 2 0.000 1.000 0.000 . . . . . . 50 . . . . 50 . . . . . . . . . . . . _* 10 0.000 1.361 0.000 . . . 50 . 10 . . . . . . 40 . . . . . . . . . . . _2 265 0.007 2.067 0.015 . . 1 56 1 20 . . . . . . 3 3 4 2 . . . 7 1 . 1 . _4 1560 0.044 1.137 0.050 . . . 81 1 8 . . . . . . 3 1 1 . . . . 1 . . . . _6 1 0.000 0.000 0.000 . . . 99 . . . . . . . . . . . . . . . . . . . . _8 492 0.014 1.907 0.026 . . . 57 . 22 . . . . . . 4 6 5 . . . . 2 . . . . _A 128 0.004 2.196 0.008 . . . 14 . 14 2 . . . . . 46 7 2 14 . . . . . . . . _C 23 0.001 2.315 0.001 . . . 26 . 39 9 . . . . . 4 13 4 . . 4 . . . . . . _D 102 0.003 1.814 0.005 1 . 1 46 . 39 . 1 2 . . . 4 . 5 . . . . . . . . 1 _E 350 0.010 1.028 0.010 . . 1 81 2 11 . . . . . . 2 1 1 . . . . . . . . . _F 7 0.000 2.128 0.000 . . . . 14 42 . . 14 . . . 14 . 14 . . . . . . . . . _G 134 0.004 2.336 0.009 . 1 1 51 3 15 1 . . . . . 11 7 6 1 . . . 2 1 . . . _H 145 0.004 1.764 0.007 . . 1 63 1 21 . . 1 . . . 3 3 1 3 . . . 1 1 . 1 . _I 1 0.000 0.000 0.000 . . . . . . . . . . . . . . 99 . . . . . . . . . _L 2 0.000 0.000 0.000 . . . 99 . . . . . . . . . . . . . . . . . . . . _O 1279 0.036 2.486 0.089 . . 1 41 2 16 . . . . . . 15 12 9 2 . . . 1 1 . . . _P 98 0.003 1.799 0.005 . . . 60 . 16 . . . . . . 13 3 4 1 . . . 1 1 . . . _R 125 0.004 1.036 0.004 . 1 1 81 . 12 1 . . . . . 2 1 1 . . . . . . . . 1 _S 650 0.018 2.591 0.047 . . . 25 1 24 . . . . . . 19 14 12 1 . . . . 1 . . . _T 723 0.020 2.527 0.051 . . . 29 1 25 . . . . . . 12 15 15 1 . . . . 2 . . . __ 1 0.000 0.000 0.000 . . . 99 . . . . . . . . . . . . . . . . . . . . ----- ----- ----- ----- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- TOT 35620 1.000 1.767 1.767 17 12 11 11 8 7 6 6 5 4 3 3 3 1 1 1 1 1 0 0 0 0 0 0 The only significant digraphs that can be taken as sign of word start are "2." and "4.". The number of new breaks implied by each is shown below: 2. 107 (61 before 2_, 19 before 2A) 4. 116 (112 before 4O; all ok) The following digraphs are practically never word-initial: C. except CP DG E_ G_ I. L. M. N. R_ Z. OK, let's convert the text. We will not try to correct the word breaks any more, because the corrections that are not negligible are dangerous. --- fsg2glp ------------------------ #! /n/gnu/bin/sed -f # Splits raw FSG into "glyphs" # # Preliminary corrections: # s/$/_/g s/^/_/g s/ /_/g s/CI/A/g s/IIIL/M/g s/CM/AN/g s/AL/AN/g s/A_/G_/g s/A2/AR/g s/AO/AE/g s/AI+E/AE/g s/AII+R/AIR/g s/4A/4O/g s/4CD/4OD/g s/4CH/4OH/g s/4G/4O/g # # Line, paragraph, and word breaks. # s@//@(//)@g s@=@(=)@g s@__*@(_)@g # # Glyph splitting # s/AK/(ak)/g s/AM/(am)/g s/AN/(an)/g s/AR/(ar)/g s/AIR/(air\?)/g s/AE/(ae)/g # Any remaining "A"s seem to be misreadings of "O" s/A/O/g # s/8G/(bg)/g s/G/(g)/g # s/HZ/(hz)/g s/DZ/(dz)/g # s/PZ/(pz)/g s/FZ/(fz)/g # s/z)CCC/z)(ccc\?)/g s/z)CC/z)(cc\?)/g s/z)C/zc)/g # s/H/(h)/g s/D/(d)/g # s/P/(p)/g s/F/(f)/g # s/h)CCC/h)(ccc\?)/g s/h)CC/hcc)/g s/h)C/hc)/g s/d)CCC/d)(ccc\?)/g s/d)CC/dcc)/g s/d)C/dc)/g # s/4OE/(qoe)/g s/OE/(oe)/g s/4OR/(qor)/g s/OR/(or)/g s/4O/(qo)/g s/O/(o)/g # s/SCCC+/(sccc\?)/g s/SCC/(scc)/g s/SC/(sc)/g s/S/(s)/g s/TCCC+/(tccc\?)/g s/TCC/(tcc)/g s/TC/(tc)/g s/T/(t)/g s/CCC/(ccc\?)/g s/CC/(cc\?)/g s/C/(c\?)/g # s/R/(r)/g s/2/(z)/g s/8/(b)/g s/E/(e)/g # Remaining uppercase letters should be checked manualy s/\([-A-Z0-9*][-A-Z0-9*]*\)/(\1\?)/g ------------------------------------ OK. Let's run it: cat .tmp-c-fsg.txt \ | /n/gnu/bin/sed \ -e 's/^ *//g' -e 's/ *$//g' -e 's/ */ /g' \ > .voyn.fsg cat .voyn.fsg \ | fsg2glp \ > .voyn.glp Sample of .voyn.glp: (_)(f)(tc)(bg)(d)(ar)(g)(_)(o)(dcc)(g)(_)(qo)(d)(ar)(_)(s)(g)(d)(tc)(bg)(_)(qo)(d)(ar)(_)(sc)(bg)(_)(//)(_) (_)(b)(an)(_)(sc)(g)(_)(e)(g)(_)(z)(sc)(oe)(_)(qoe)(tc)(bg)(_)(tc)(bg)(d)(ar)(_)(tc)(dcc)(bg)(_)(r)(ar)(_)(//)(_) (_)(qo)(d)(an)(_)(t)(ak)(_)(o)(d)(tc)(g)(_)(4?)(c?)(g)(_)(d)(an)(_)(scc)(d)(g)(_)(e)(h)(an)(_)(oe)(d)(ar)(_)(or)(_)(//)(_) (_)(b)(o)(dz)(g)(_)(e)(d)(ak)(o)(_)(g)(dcc)(g)(_)(e)(sc)(g)(_)(d)(ae)(_)(bg)(_)(sc)(g)(_)(o)(d)(_)(sc)(g)(_)(qo)(dcc)(bg)(_)(//)(_) (_)(sc)(g)(d)(ar)(_)(sc)(g)(_)(dzc)(g)(_)(r)(_)(an)(_)(oe)(_)(oe)(scc)(bg)(_)(qor)(cc?)(g)(_)(qo)(d)(g)(_)(//)(_) (_)(p)(tc)(g)(dcc)(ar)(_)(oe)(d)(g)(_)(b)(ar)(_)(o)(dc)(g)(_)(qo)(d)(an)(_)(t)(hz)(g)(_)(qo)(dcc)(bg)(_)(qo)(d)(g)(_)(//)(_) (_)(p)(tc)(bg)(_)(qo)(d)(s)(bg)(_)(g)(h)(an)(_)(tc)(bg)(_)(qo)(d)(ar)(_)(t)(g)(_)(e)(oe)(_)(tc)(bg)(_)(qo)(d)(g)(_)(//)(_) (_)(z)(ar)(tc)(g)(_)(qo)(h)(ar)(bg)(_)(b)(sc)(dz)(g)(_)(qo)(d)(an)(_)(t)(dz)(g)(_)(e)(sc)(bg)(_)(o)(dcc)(bg)(_)(//)(_) (_)(qo)(d)(t)(bg)(_)(tc)(hc)(g)(_)(e)(o)(_)(qo)(dc)(bg)(_)(qo)(d)(an)(_)(tcc)(dc)(g)(_)(qo)(d)(o)(p)(_)(oe)(tc)(bg)(_)(z)(ae)(_)(//)(_) (_)(b)(s)(or)(_)(qo)(h)(ar)(_)(t)(bg)(_)(sc)(g)(_)(qo)(d)(an)(_)(c?)(*?)(dz)(g)(bg)(_)(o)(hc)(g)(_)(hc)(bg)(_)(e)(tc)(bg)(_)(//)(_) These are the most popular "words" in this parsing, using the original word breaks: cat .voyn.glp \ | sed \ -e 's:([/ _=]*):@:g' \ | tr '@' '\n' \ | egrep '.' \ | sort | uniq -c | expand \ | sort +0.0 -0.7nr \ | compute-freqs \ > .voyn-m.frq 213 0.033 (sc)(bg) 194 0.030 (tc)(bg) 178 0.028 (oe) 161 0.025 (qo)(dc)(bg) 157 0.024 (qo)(d)(an) 152 0.024 (qo)(dcc)(bg) 107 0.017 (qo)(d)(ae) 88 0.014 (qoe) 87 0.013 (qo)(d)(am) 86 0.013 (qo)(dcc)(g) 78 0.012 (b)(am) 78 0.012 (sc)(g) 75 0.012 (tc)(g) 58 0.009 (b)(ar) 58 0.009 (qo)(d)(g) 57 0.009 (or) 56 0.009 (e)(tc)(bg) 53 0.008 (b)(ae) 50 0.008 (o)(hc)(bg) 48 0.007 (qo)(d)(ar) 47 0.007 (qo)(hc)(bg) 46 0.007 (bg) 45 0.007 (b)(an) 44 0.007 (o)(dc)(bg) 41 0.006 (o)(d)(an) 41 0.006 (qo)(dc)(g) 40 0.006 (qo)(hcc)(bg) 35 0.005 (t)(dz)(g) 35 0.005 (z)(am) 34 0.005 (o)(d)(am) 34 0.005 (tcc)(g) 33 0.005 (o)(dcc)(bg) 33 0.005 (scc)(bg) 31 0.005 (scc)(g) 30 0.005 (am) 30 0.005 (z)(oe) Here are the glyph frequencies (ignoring glyphs with "*"): cat .voyn.glp \ | sed \ -e 's:([/ _=]*):@:g' \ -e 's/)(/)@(/g' \ | tr '@' '\n' \ | egrep '.' \ | classify-glyphs \ | sort | uniq -c | expand \ | sort +0.8 -0.99 \ | compute-freqs \ > .voyn-g.frq 30 0.002 ?:(4?) 12 0.001 ?:(6?) 10 0.001 ?:(I?) 10 0.001 ?:(II?) 2 0.000 ?:(IIIK?) 2 0.000 ?:(IIK?) 2 0.000 ?:(IIL?) 10 0.001 ?:(K?) 6 0.000 ?:(L?) 23 0.001 ?:(M?) 7 0.000 ?:(N?) 28 0.002 ?:(air?) 38 0.002 ?:(c?) 48 0.003 ?:(cc?) 29 0.002 ?:(ccc?) 2055 0.117 g:(bg) 1726 0.098 g:(g) 885 0.050 o:(o) 1436 0.082 o:(qo) 1152 0.065 o:(oe) 193 0.011 o:(qoe) 286 0.016 o:(or) 17 0.001 o:(qor) 557 0.032 a:(ae) 43 0.002 a:(ak) 414 0.024 a:(am) 495 0.028 a:(an) 405 0.023 a:(ar) 685 0.039 r:(b) 456 0.026 r:(e) 186 0.011 r:(r) 365 0.021 r:(z) 147 0.008 D:(dz) 52 0.003 D:(dzc) 91 0.005 H:(hz) 30 0.002 H:(hzc) 3 0.000 F:(fz) 1 0.000 F:(fzc) 12 0.001 P:(pz) 9 0.001 P:(pzc) 1141 0.065 d:(d) 412 0.023 d:(dc) 440 0.025 d:(dcc) 513 0.029 h:(h) 210 0.012 h:(hc) 129 0.007 h:(hcc) 32 0.002 f:(f) 195 0.011 p:(p) 212 0.012 s:(s) 716 0.041 s:(sc) 150 0.009 s:(scc) 400 0.023 t:(t) 927 0.053 t:(tc) 126 0.007 t:(tcc)