# Attributes of samples and subsections # Last edited on 2023-05-14 13:38:05 by stolfi # # Fields are # # SUBDIR SOURCE GUDNUM KEY DESCR # # where # # SUBDIR is LANG/BUK/TAG.N (in "dat" directory). # SOURCE is LANG/BUK (in "langbank" directory). # GUDNUM is number of good words to take in this section. # KEY is a short id for plot keys. # DESCR is a documentation string. # # The last section of any sample *must* be "tot.1". #---------------------------------------------------------------------- # Biblical Hebrew in JSHB hebr/tav/gen.1 - 35027 HBV-1 Hebrew_Bible_vov_1_Genesis hebr/tav/exo.1 - 35027 HBV-2 Hebrew_Bible_vov_2_Exodus hebr/tav/num.1 - 35027 HBV-3 Hebrew_Bible_vov_3_Numeri hebr/tav/lev.1 - 35027 HBV-4 Hebrew_Bible_vov_4_Leviticus hebr/tav/deu.1 - 35027 HBV-5 Hebrew_Bible_vov_5_Deuteronomium hebr/tav/tot.1 hebr/tan 35027 HBV Hebrew_Bible_vov_1-5_Torah hebr/tad/tot.1 hebr/tan 35027 HBD Hebrew_Bible_dev_1-5_Torah #---------------------------------------------------------------------- # Ge`ez in SERA geez/gok/tot.1 geez/gok 35027 GoK Glory_of_the_Kings geez/eno/tot.1 geez/eno 35027 Eno First_Book_of_Enoch #---------------------------------------------------------------------- # Modern English in ISO Latin-1: engl/wow/tot.1 engl/wow 35027 WoW War_of_the_Worlds engl/wnm/tot.1 engl/wow 35027 WoW-N War_of_the_Worlds_-_Names #---------------------------------------------------------------------- # 1600's English in ISO Latin-1: engl/cul/pre.1 - 35027 CpH-P Culpeper's_Herbal_-_Preamble engl/cul/her.1 - 35027 CpH-H Culpeper's_Herbal_-_Herbs engl/cul/rec.1 - 35027 CpH-R Culpeper's_Herbal_-_Recipes engl/cul/tot.1 engl/cul 35027 CpH Culpeper's_Herbal engl/cpn/tot.1 engl/cul 35027 CpH-N Culpeper's_Herbal_-_Plant_Names #---------------------------------------------------------------------- # Middle English (1460) in ISO Latin-1: engl/twp/tot.1 engl/twp 35027 TwP Towneley_Plays #---------------------------------------------------------------------- # Early Church Latin in ISO Latin-1: latn/ptt/gen.1 - 35027 VOT-1 Vulgate_OT_1_Genesis latn/ptt/exo.1 - 35027 VOT-2 Vulgate_OT_2_Exodus latn/ptt/num.1 - 35027 VOT-3 Vulgate_OT_3_Numeri latn/ptt/lev.1 - 35027 VOT-4 Vulgate_OT_4_Leviticus latn/ptt/deu.1 - 35027 VOT-5 Vulgate_OT_5_Deuteronomium latn/ptt/tot.1 latn/ptt 35027 VOT-P Vulgate_OT_1-5_Pentateuch latn/nwt/mat.1 - 35027 VNT-1 Vulgate_NT_1_Matthew latn/nwt/mrk.1 - 35027 VNT-2 Vulgate_NT_Mark latn/nwt/luk.1 - 35027 VNT-3 Vulgate_NT_Luke latn/nwt/joh.1 - 35027 VNT-4 Vulgate_NT_John latn/nwt/tot.1 latn/nwt 35027 VNT-G Vulgate_NT_Gospels #---------------------------------------------------------------------- # 1300's Academic Latin in ISO Latin-1: latn/ock/tot.1 latn/ock 35027 Ock Ockam's_Dialogus #---------------------------------------------------------------------- # Early Church Greek (Byzantine Koiné) in JSGR: grek/nwt/mat.1 - 35027 BNT-1 Byzantine_NT_1_Matthew grek/nwt/mrk.1 - 35027 BNT-2 Byzantine_NT_2_Mark grek/nwt/luk.1 - 35027 BNT-3 Byzantine_NT_3_Luke grek/nwt/joh.1 - 35027 BNT-4 Byzantine_NT_4_John grek/nwt/tot.1 grek/nwt 35027 BNT-G Byzantine_NT_1-4_Gospels #---------------------------------------------------------------------- # 1600's Spanish in ISO Latin-1: span/qvi/one.1 span/qvi 35027 DQux-1 Don_Quixote_Part_I span/qvi/two.1 span/qvi 35027 DQux-2 Don_Quixote_Part_II span/qvi/tot.1 span/qvi 35027 DQux Don_Quixote #---------------------------------------------------------------------- # Modern Italian in ISO Latin-1: ital/psp/tot.1 ital/psp 35027 PrSp Promessi_Sposi #---------------------------------------------------------------------- # Modern French in ISO Latin-1: fran/tal/tot.1 fran/tal 35027 TrLn De_la_Terre_a_la_lune #---------------------------------------------------------------------- # Modern Portuguese in ISO Latin-1: port/csm/tot.1 port/csm 35027 DCsm Dom_Casmurro #---------------------------------------------------------------------- # Modernized German in ISO Latin-1: germ/sim/tot.1 germ/sim 35027 Simp Simplicissimus_Teutsch #---------------------------------------------------------------------- # Russian transliterated into Latin-1: russ/pic/tot.1 russ/pic 35027 RPic Piknik_na_obochine #---------------------------------------------------------------------- # Russian in KOI8-R: russ/ptt/gen.1 - 35027 SOT-1 Synodal_OT_1_Genesis russ/ptt/exo.1 - 35027 SOT-2 Synodal_OT_2_Exodus russ/ptt/num.1 - 35027 SOT-3 Synodal_OT_3_Numeri russ/ptt/lev.1 - 35027 SOT-4 Synodal_OT_4_Leviticus russ/ptt/deu.1 - 35027 SOT-5 Synodal_OT_5_Deuteronomium russ/ptt/tot.1 russ/ptt 35027 SOT-P Synodal_OT_1-5_Pentateuch #---------------------------------------------------------------------- # Russian romaized: russ/ptr/gen.1 - 35027 SOTR-1 Synodal_OT_Rom_1_Genesis russ/ptr/exo.1 - 35027 SOTR-2 Synodal_OT_Rom_2_Exodus russ/ptr/num.1 - 35027 SOTR-3 Synodal_OT_Rom_3_Numeri russ/ptr/lev.1 - 35027 SOTR-4 Synodal_OT_Rom_4_Leviticus russ/ptr/deu.1 - 35027 SOTR-5 Synodal_OT_Rom_5_Deuteronomium russ/ptr/tot.1 russ/ptt 35027 SOTR-P Synodal_OT_Rom_1-5_Pentateuch #---------------------------------------------------------------------- # Classical Arabic in JSAR arab/quf/tot.1 arab/quv 35027 Qur-F Holy_Quran_-_Vowels_Sukuns arab/quv/tot.1 arab/quv 35027 Qur-V Holy_Quran_-_Vowels arab/qud/tot.1 arab/quv 35027 Qur-D Holy_Quran_-_Devowelled arab/qph/tot.1 arab/qph 35027 Qur-P Holy_Quran_-_Semi-Phonetic arab/qcs/tot.1 arab/qcs 35027 Qur Holy_Quran_-_Consonants #---------------------------------------------------------------------- # Vietnamese in VIQR viet/ptt/gen.1 - 35027 COT-1 Cadman_OT_1_Genesis viet/ptt/exo.1 - 35027 COT-2 Cadman_OT_2_Exodus viet/ptt/num.1 - 35027 COT-3 Cadman_OT_3_Numeri viet/ptt/lev.1 - 35027 COT-4 Cadman_OT_4_Leviticus viet/ptt/deu.1 - 35027 COT-5 Cadman_OT_5_Deuteronomium viet/ptt/tot.1 viet/ptt 35027 COT-P Cadman_OT_1-5_Pentateuch viet/nwt/mat.1 - 35027 CNT-1 Catholic_NT_1_Matthew viet/nwt/mrk.1 - 35027 CNT-2 Catholic_NT_2_Mark viet/nwt/luk.1 - 35027 CNT-3 Catholic_NT_3_Luke viet/nwt/jhn.1 - 35027 CNT-4 Catholic_NT_4_John viet/nwt/tot.1 viet/nwt 35027 CNT-G Catholic_NT_1-4_Gospels #---------------------------------------------------------------------- # Modern Bible Mandarin Chinese in ideograms (Guo Biao) chin/ptt/gen.1 - 35027 UOT-1 Union_OT_1_Genesis chin/ptt/exo.1 - 35027 UOT-2 Union_OT_2_Exodus chin/ptt/num.1 - 35027 UOT-3 Union_OT_3_Numeri chin/ptt/lev.1 - 35027 UOT-4 Union_OT_4_Leviticus chin/ptt/deu.1 - 35027 UOT-5 Union_OT_5_Deuteronomium chin/ptt/tot.1 chin/ptt 35027 UOT-P Union_OT_1-5_Pentateuch chin/ptn/gen.1 - 35027 NOT-1 NewTrans_OT_1_Genesis chin/ptn/exo.1 - 35027 NOT-2 NewTrans_OT_2_Exodus chin/ptn/num.1 - 35027 NOT-3 NewTrans_OT_3_Numeri chin/ptn/lev.1 - 35027 NOT-4 NewTrans_OT_4_Leviticus chin/ptn/deu.1 - 35027 NOT-5 NewTrans_OT_5_Deuteronomium chin/ptn/tot.1 chin/ptn 35027 NOT-P NewTrans_OT_1-5_Pentateuch #---------------------------------------------------------------------- # 1700's Mandarin Chinese in ideograms (Guo Biao) chin/red/tot.1 chin/red 35027 Red Dream_of_Red_Mansion #---------------------------------------------------------------------- # Modern News Mandarin Chinese in ideograms (Guo Biao) chin/voa/tot.1 chin/voa 35027 VoA-G Voice_of_America_News_-_Ideograms #---------------------------------------------------------------------- # Modern News Mandarin Chinese in Pinyin chip/voa/tot.1 chip/voa 35027 VoA-P Voice_of_America_News_-_Pinyin #---------------------------------------------------------------------- # 600's Tibetan in ACIP-JS tibe/vim/tot.1 tibe/vim 35027 Vim Vimalakirti_Sutra tibe/ccv/tot.1 tibe/ccv 35027 CVR Comm_on_Comm_on_Valid_Reasoning #---------------------------------------------------------------------- # Modern Tibetan in ACIP-JS tibe/pmi/tot.1 tibe/pmi 35027 PMI Play_of_Mistaken_Illusion #---------------------------------------------------------------------- # Chinese encoded as "Roman-Voynichese" Numerals chrc/red/tot.1 chin/red 35027 Red-X Dream_of_Red_Mansion_-_Roman_Code #---------------------------------------------------------------------- # English words encoded as Roman Numerals enrc/wow/tot.1 engl/wow 35027 WoW-X War_of_the_Worlds_-_Roman_Code #---------------------------------------------------------------------- # English words encoded as Vietnamese syllables # Not enough syllables for that! envt/wow/tot.1 engl/wow 35027 WoW-Y War_of_the_Worlds_-_Vietnamese_Code #---------------------------------------------------------------------- # English words in Vigenère cipher envg/wow/tot.1 engl/wow 35027 WoW-Z War_of_the_Worlds_-_Vigenere_Cipher #---------------------------------------------------------------------- # Gordon Rugg's Pseudo-Voynichese in EVA voyp/grs/tot.1 voyp/grs 35027 Rug-S Rugg's_Pseudo-Voynichese_-_Software voyp/grm/tot.1 voyp/grm 35027 Rug-M Rugg's_Pseudo-Voynichese_-_Manual #---------------------------------------------------------------------- # Pseudo-Vietnamese by Rugg's method viep/grs/tot.1 viep/grs 35027 Rug-V Rugg's_Pseudo_Vietnamese_-_Software #---------------------------------------------------------------------- # Pseudo-Vietnamese by first-order Monkey viep/mky/tot.1 viep/mky 35027 Mky-V Monkey_Vietnamese_-_Software