#! /bin/bash # Last edited on 2023-05-14 19:47:46 by stolfi cmd=${0##*/} usage="${cmd} [-format {FORMAT}] [-show {SHOW}]" # Exit on first error/abort: set -e format="svg" show=0 while [[ ( $# -gt 0 ) && ( "/$1" =~ /-* ) ]]; do if [[ ( $# -ge 2 ) && ( "/$1" == "/-show" ) ]]; then show="$2"; shift; shift elif [[ ( $# -ge 2 ) && ( "/$1" == "/-format" ) ]]; then format="$2"; shift; shift else echo "bad option"; echo "usage: ${usage}" 1>&2; exit 1 fi done if [[ $# -ne 0 ]]; then echo "usage: ${usage}" 1>&2; exit 1 fi # Generates a set of Zipf plots comparing Voynichese, # Gruggish, and various other languages. xtra=0 # If 1, creates the test plots only, else the normal plots only. norm=$(( 1 - ${xtra} )) voyn=${norm} bibl=${norm} euro=${norm} semi=${norm} asia=${norm} synt=${norm} if [[ ${xtra} -ne 0 ]]; then echo "### New plots ###" 1>&2 # Test of line colors etc: oname="zipf-test-0"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyn/prs/hea.2 "0 VMS Herbal A 2" \ voyn/prs/heb.2 "1 VMS Herbal B 2" \ voyn/prs/zod.1 "2 VMS Zodiac 1 - Prose" \ voyn/prs/str.1 "3 VMS Stars 1 - Prose" \ voyn/prs/tot.1 "4 VMS prose" \ voyn/lab/tot.1 "5 VMS labels" \ voyn/prs/cos.1 "6 VMS Cosmo 1 - Prose" \ voyn/prs/unk.1 "7 VMS Unknown 1 - Prose" \ voyn/prs/unk.2 "8 VMS Unknown 2 - Prose" \ voyn/prs/unk.4 "9 VMS Unknown 4 - Prose" \ ${oname} # Testing key placement (short names): oname="zipf-test-1"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyn/prs/cos.1 "VMS Cosmo 1 - Prose" \ voyn/lab/tot.1 "VMS labels" \ ${oname} # Testing key placement (long names): oname="zipf-test-2"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyn/prs/cos.1 "VMoynich MS Cosmological section 1 - Prose" \ voyn/lab/tot.1 "VMoynich MS labels" \ ${oname} # Test of timing (5k distinct words): oname="zipf-test-time-1"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ engl/wow/tot.1 "Wells War of Worlds" \ ${oname} # Test of timing (10 k distinct words): oname="zipf-test-time-2"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ engl/wow/tot.1 "Wells War of Worlds" \ engl/wow/tot.1 "Wells War of Worlds" \ ${oname} # Test of timing (20 k distinct words): oname="zipf-test-time-3"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ engl/wow/tot.1 "Wells War of Worlds" \ engl/wow/tot.1 "Wells War of Worlds" \ engl/wow/tot.1 "Wells War of Worlds" \ engl/wow/tot.1 "Wells War of Worlds" \ ${oname} # Test of timing (40 k distinct words): oname="zipf-test-time-4"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ engl/wow/tot.1 "Wells War of Worlds" \ engl/wow/tot.1 "Wells War of Worlds" \ engl/wow/tot.1 "Wells War of Worlds" \ engl/wow/tot.1 "Wells War of Worlds" \ engl/wow/tot.1 "Wells War of Worlds" \ engl/wow/tot.1 "Wells War of Worlds" \ engl/wow/tot.1 "Wells War of Worlds" \ engl/wow/tot.1 "Wells War of Worlds" \ ${oname} fi if [[ ${bibl} -ne 0 ]]; then echo "### Bible plots ###" 1>&2 # Comparison of the five books of the Vulgate Pentateuch oname="zipf-laot-0"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ latn/ptt/gen.1 "Vulgate Genesis" \ latn/ptt/exo.1 "Vulgate Exodus" \ latn/ptt/num.1 "Vulgate Numeri" \ latn/ptt/lev.1 "Vulgate Leviticus" \ latn/ptt/deu.1 "Vulgate Deuteronomium" \ ${oname} # Comparison of the five books of the hebrew Pentateuch (Tanakh) oname="zipf-heot-0"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ hebr/tav/gen.1 "Tanakh Genesis" \ hebr/tav/exo.1 "Tanakh Exodus" \ hebr/tav/num.1 "Tanakh Numeri" \ hebr/tav/lev.1 "Tanakh Leviticus" \ hebr/tav/deu.1 "Tanakh Deuteronomium" \ ${oname} # Comparison of the four books of the Vulgate New Testament: oname="zipf-lant-0"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ latn/nwt/mat.1 "Vulgate Matthew" \ latn/nwt/mrk.1 "Vulgate Mark" \ latn/nwt/luk.1 "Vulgate Luke" \ latn/nwt/joh.1 "Vulgate John" \ ${oname} fi if [[ ${voyn} -ne 0 ]]; then echo "### Voynichese ###" 1>&2 # Voynichese prose and labels: oname="zipf-voyn-0"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyn/prs/tot.1 "VMS prose" \ voyn/lab/tot.1 "VMS labels" \ ${oname} # Voynichese Herbal B, biology, and Stars 2 oname="zipf-voyn-1"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyn/prs/bio.1 "VMS Biology" \ voyn/prs/heb.1 "VMS Herbal B 1" \ voyn/prs/str.2 "VMS Stars 2 - Prose" \ ${oname} # Voynichese Herbal A and Pharma 1, 2: oname="zipf-voyn-2"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyn/prs/hea.1 "VMS Herbal A 1" \ voyn/prs/pha.1 "VMS Pharma 1 - Prose" \ voyn/prs/pha.2 "VMS Pharma 2 - Prose" \ ${oname} # Voynichese Herbal A and B: oname="zipf-voyn-3"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyn/prs/hea.1 "VMS Herbal A 1" \ voyn/prs/heb.1 "VMS Herbal B 1" \ ${oname} # Voynichese Cosmo 2, 3: oname="zipf-voyn-4"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyn/prs/cos.2 "VMS Cosmo 2 - Prose" \ voyn/prs/cos.3 "VMS Cosmo 3 - Prose" \ ${oname} # Voynichese Cosmo 1 prose: oname="zipf-voyn-5"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyn/prs/cos.1 "VMS Cosmo 1 - Prose" \ ${oname} # Voynichese Miscellanea: oname="zipf-voyn-6"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyn/prs/hea.2 "VMS Herbal A 2" \ voyn/prs/heb.2 "VMS Herbal B 2" \ voyn/prs/zod.1 "VMS Zodiac 1 - Prose" \ voyn/prs/str.1 "VMS Stars 1 - Prose" \ ${oname} # Voynichese Unknown sections 1,2,4-7: oname="zipf-voyn-7"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyn/prs/unk.1 "VMS Unknown 1 - Prose" \ voyn/prs/unk.2 "VMS Unknown 2 - Prose" \ voyn/prs/unk.4 "VMS Unknown 4 - Prose" \ voyn/prs/unk.5 "VMS Unknown 5 - Prose" \ voyn/prs/unk.6 "VMS Unknown 6 - Prose" \ voyn/prs/unk.7 "VMS Unknown 7 - Prose" \ ${oname} # Voynichese Unknown section 3: oname="zipf-voyn-8"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyn/prs/unk.3 "VMS Unknown 3 - Prose" \ ${oname} # Voynichese Herbal A and German: oname="zipf-voyn-hea-euro-2"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyn/prs/hea.1 "VMS Herbal A 1" \ germ/sim/tot.1 "German - Simplicissimus" \ ${oname} # Voynichese Herbal A and English: oname="zipf-voyn-hea-engl-1"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyn/prs/hea.1 "VMS Herbal A 1" \ engl/cul/tot.1 "English - Culpeper Herbal" \ ${oname} # Voynichese Herbal B and Tibetan: oname="zipf-voyn-heb-tibe-1"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyn/prs/heb.1 "VMS Herbal B 1" \ tibe/pmi/tot.1 "Tibetan - Illusion" \ tibe/ccv/tot.1 "Tibetan - Comment" \ ${oname} fi if [[ ${euro} -ne 0 ]]; then echo "### English ###" 1>&2 # English, 17th and 19th centuries: oname="zipf-engl-0"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ engl/cul/tot.1 "Culpeper Herbal" \ engl/wow/tot.1 "Wells War of Worlds" \ ${oname} # English, 15th, 17th, and 19th centuries: oname="zipf-engl-1"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ engl/twp/tot.1 "Towneley Plays" \ engl/cul/tot.1 "Culpeper Herbal" \ engl/wow/tot.1 "Wells War of Worlds" \ ${oname} # Bibles in Latin, Greek, Russian: oname="zipf-euro-2"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ latn/ptt/tot.1 "Latin Vulgate OT" \ grek/nwt/tot.1 "Greek Byzantine NT" \ russ/ptr/tot.1 "Russian Synodal OT" \ ${oname} # Novels in Spanish and Portuguese: oname="zipf-euro-3"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ span/qvi/one.1 "Spanish - Don Quixote" \ port/csm/tot.1 "Portug. - Dom Casmurro" \ ${oname} # Novels in European languages - German, Russian, French, Italian: oname="zipf-euro-4"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ germ/sim/tot.1 "German - Simplicissimus" \ russ/pic/tot.1 "Russian - Roadside Picnic" \ fran/tal/tot.1 "French - Terre a la Lune" \ ital/psp/tot.1 "Italian - Promessi Sposi" \ ${oname} # Spanish, two novels by same author, 10 years apart: oname="zipf-span-1"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ span/qvi/one.1 "Don Quixote - Part I" \ span/qvi/two.1 "Don Quixote - Part II" \ ${oname} # Russian, 20th novel and 19th bible: oname="zipf-russ-1"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ russ/pic/tot.1 "Roadside Picnic" \ russ/ptr/tot.1 "Synodal Pentateuch" \ ${oname} fi if [[ ${semi} -ne 0 ]]; then echo "### Semitic languages ###" 1>&2 # Religious Geez, Hebrew, Arabic: oname="zipf-semi-1"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ geez/gok/tot.1 "Geez Glory of Kings" \ hebr/tav/tot.1 "Hebrew Torah " \ arab/quv/tot.1 "Arabic Quran" \ ${oname} # Arabic Quran with various spellings: oname="zipf-semi-2"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ arab/quv/tot.1 "Arabic Quran - vowels" \ arab/quf/tot.1 "Arabic Quran - vowels+sukuns" \ arab/qph/tot.1 "Arabic Quran - phonetic" \ arab/qcs/tot.1 "Arabic Quran - no vowels" \ arab/qud/tot.1 "Arabic Quran - devowelled" \ ${oname} # Arabic Quran with no vowels and devowelled: oname="zipf-semi-3"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ arab/qcs/tot.1 "Arabic Quran - no vowels" \ arab/qud/tot.1 "Arabic Quran - devowelled" \ ${oname} # Arabic Quran with vowels and devowelled: oname="zipf-semi-4"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ arab/quv/tot.1 "Arabic Quran - vowels" \ arab/qud/tot.1 "Arabic Quran - devowelled" \ ${oname} # Hebrew Tanak with vowels and devowelled: oname="zipf-semi-5"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ hebr/tav/tot.1 "Hebrew Tanakh - vowels" \ hebr/tad/tot.1 "Hebrew Tanakh - devoweled" \ ${oname} fi if [[ ${asia} -ne 0 ]]; then echo "### Asian languages ###" 1>&2 # Tibetan play, Chinese novel, Vietnamese: oname="zipf-asia-1"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ tibe/pmi/tot.1 "Tibetan - Illusion" \ chin/red/tot.1 "Chinese - Red Mansion" \ viet/ptt/tot.1 "Vietnamese - Cadman OT" \ ${oname} # Three samples of Tibetan: oname="zipf-tibe-1"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ tibe/pmi/tot.1 "Tibetan - Illusion" \ tibe/vim/tot.1 "Tibetan - Vimalakirti" \ tibe/ccv/tot.1 "Tibetan - Comment" \ ${oname} # Various samples of Chinese: oname="zipf-chin-1"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ chin/red/tot.1 "Red Mansion" \ chin/ptt/tot.1 "Union OT - Pentateuch" \ chin/ptn/tot.1 "New Trans OT - Pentateuch" \ chin/voa/tot.1 "V. of Amer. (ideograms)" \ chip/voa/tot.1 "V. of Amer. (pinyin)" \ ${oname} # Two samples of Western translated into Vietnamese: oname="zipf-viet-1"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ viet/ptt/tot.1 "Cadman's OT - Pentateuch" \ viet/nwt/tot.1 "Catholic NT - Gospels" \ ${oname} fi if [[ ${synt} -ne 0 ]]; then echo "### Synthetic and encrypted texts ###" 1>&2 # Native Chinese in plain and in Roman Code: oname="zipf-chin-2"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ chin/red/tot.1 "Red Mansion - (ideograms)" \ chrc/red/tot.1 "Red Mansion - Roman codes" \ ${oname} # English encoded in various ways: oname="zipf-code-1"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ engl/wow/tot.1 "Wells WotW - plain" \ enrc/wow/tot.1 "Wells WotW - Roman code" \ envg/wow/tot.1 "Wells WotW - Vigenere" \ ${oname} # Dialects of Gruggish: oname="zipf-rugg"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyp/grs/tot.1 "Rugg's text (sfw)" \ voyp/grm/tot.1 "Rugg's text (hand)" \ ${oname} # Rugg's text (hand-produced) versus Voynichese: oname="zipf-rugg-voyn-1"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyp/grs/tot.1 "Rugg's text (sfw)" \ voyn/prs/hea.1 "VMS Herbal A" \ voyn/prs/heb.1 "VMS Herbal B" \ ${oname} # Rugg's text (software simulated) versus Voynichese: oname="zipf-rugg-voyn-2"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyp/grm/tot.1 "Rugg's text (hand)" \ voyn/prs/hea.1 "VMS Herbal A" \ voyn/prs/heb.1 "VMS Herbal B" \ ${oname} # Rugg's text (hand), Voynichese Biology, whole text: oname="zipf-rugg-voyn-3"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyp/grm/tot.1 "Rugg's text (hand)" \ voyn/prs/bio.1 "VMS Biology" \ voyn/prs/tot.1 "VMS whole - prose" \ ${oname} # Rugg's text (hand), Voynichese Herbal A, and Greek NT: oname="zipf-voyn-hea-euro-1"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyp/grm/tot.1 "Rugg's text (hand)" \ voyn/prs/hea.1 "VMS Herbal A" \ grek/nwt/tot.1 "Greek Byzantine NT" \ ${oname} # Voynichese Herbal B, Rugg's text (sfw): oname="zipf-voyn-heb-rugg-1"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyn/prs/heb.1 "VMS Herbal B" \ voyp/grs/tot.1 "Rugg's text (sfw)" \ ${oname} # Rugg's text (sfw), and Tibetan: oname="zipf-tibe-rugg-2"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ voyp/grs/tot.1 "Rugg's text (sfw)" \ tibe/pmi/tot.1 "Tibetan - Illusion" \ tibe/ccv/tot.1 "Tibetan - Comment" \ ${oname} # Vietnamese and Monkey text: oname="zipf-monk-1"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ viet/ptt/tot.1 "Cadman's OT - Pentateuch" \ viep/mky/tot.1 "Monkey synth" \ ${oname} # Tibetan and Monkey text: oname="zipf-monk-2"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ tibe/pmi/tot.1 "Tibetan - Illusion" \ tibe/ccv/tot.1 "Tibetan - Comment" \ viep/mky/tot.1 "Monkey synth" \ ${oname} # Three files of pseudo-Vietnamese: oname="zipf-viep-1"; echo "${oname}" compare-freq-vs-rank-distribs.sh -color -format ${format} -show ${show} \ viep/grs/tot.1 "Rugg's text (sfw)" \ viet/ptt/tot.1 "Cadman's OT - Pentateuch" \ viep/mky/tot.1 "Monkey synth" \ envt/wow/tot.1 "Wells WotW - Viet code" \ engl/wow/tot.1 "Wells WotW - plain" \ ${oname} fi