#! /usr/bin/gawk -f # Maps a file in EVMT interlinear format from EVA (European Voynich # Alphabet) to ERA (EVA reduced for noise removal). # Leaves '{}' comments and non-EVA symbols (including "%" and "!") alone. function eva_to_era(txt) { # Converts a chunk of comment-free EVA to ERA gsub(/sh/, "ch", txt); gsub(/s/, "r", txt); gsub(/t/, "k", txt); gsub(/ckh/, "eke", txt); gsub(/cph/, "epe", txt); gsub(/cfh/, "efe", txt); gsub(/ei/, "o", txt); gsub(/a/, "o", txt); gsub(/y/, "o", txt); gsub(/iii*/, "i", txt); gsub(/q/, "", txt); return txt } function convert(old, new) { # Converts a text string possibly with '{}' comments # Converts the text outside the '{}' comments, while # preserving the text inside the '{}' comments: new = ""; while (length(old) != 0) { i = index(old, "{"); if (i == 0) { new = (new eva_to_era(old)); old = ""; } else if (i > 1) { new = (new eva_to_era(substr(old, 1, i-1))); old = substr(old, i); } else { match(old, /^{[^}]*}/); if (RSTART > 0) { new = (new substr(old, 1, RLENGTH)); old = substr(old, RLENGTH + 1); } else { printf "line %d, missing '\}'\n", NR > "/dev/stderr"; new = (new old); old = ""; } } } return new; } /^ *$/ { print; next } /^ *#/ { print; next; } /^<[^>]*> *$/ { print; next } /^