#! /usr/bin/gawk -f # Usage: $0 < INFILE > OUTFILE # Removes all EVMT comments and formatting. # Leaves only the the Voynich text. BEGIN {abort = -1} (abort >= 0) { exit abort; } /^#/ { next; } /./ { if (substr($0,1,1) == "<") { skip = 19; loc = substr($0,1,19); gsub(/ *$/, "", loc); if ( loc !~ /^$/ ) { printf "line %d, bad location \"%s\"\n", NR, loc > "/dev/stderr" } } else { skip = 0; loc = ""; } if (skip >= length($0)) { next; } txt = cleanup(substr($0,1+skip)); print txt; next; } function cleanup(txt) { # Removes crud from text # We discard "%" and "!". gsub(/[ !]/, "", txt); gsub(/[%]/, " ", txt); # We discard "{}"-comments: gsub(/\{[^{}]*\}/, "", txt); # We choose arbitrarily the first of alternative transcriptions: gsub(/\[/, "", txt); gsub(/\|[^\]]*\]/, "", txt); gsub(/\]/, "", txt); return txt }