#! /usr/bin/gawk -f # Recoding Voynich text (or interlinear file) # from FSG to ECC (my error-tolerant lossy encoding) BEGIN { pcmt = 1 } function ecc(txt) { # We discard "%" and "!" since the conversion # will destroy synchronism anyway. gsub(/[% !]/, "", txt); # We discard comments: gsub(/\{[^}]*\}/, "", txt); # We choose arbitrarily the first of alternative transcriptions: gsub(/\[/, "", txt); gsub(/\|[^\]]*\]/, "", txt); gsub(/\]/, "", txt); # We also discard spaces ("." in the evt format), # since they are not reliable gsub(/[.]/, "", txt); # First, the conversion from FSG to JSA (Stolfi's super-analytic) gsub(/IIIK/, "iiiij", txt); gsub(/IIIL/, "iiiiu", txt); gsub(/IIIR/, "iiiis", txt); gsub(/IIIE/, "iiiix", txt); gsub(/IIE/, "iiix", txt); gsub(/IIR/, "iiis", txt); gsub(/IIK/, "iiij", txt); gsub(/HZ/, "cqjc", txt); gsub(/PZ/, "cqgc", txt); gsub(/DZ/, "cljc", txt); gsub(/FZ/, "clgc", txt); gsub(/IE/, "iix", txt); gsub(/IR/, "iis", txt); gsub(/IK/, "iij", txt); gsub(/2/, "cs", txt); gsub(/4/, "q", txt); gsub(/6/, "cj", txt); gsub(/7/, "ig", txt); gsub(/8/, "cg", txt); gsub(/A/, "ci", txt); gsub(/C/, "c", txt); gsub(/D/, "lj", txt); gsub(/E/, "ix", txt); gsub(/F/, "lg", txt); gsub(/G/, "cy", txt); gsub(/H/, "qj", txt); gsub(/I/, "i", txt); gsub(/K/, "ij", txt); gsub(/L/, "iu", txt); gsub(/M/, "iiiu", txt); gsub(/N/, "iiu", txt); gsub(/O/, "o", txt); gsub(/P/, "qg", txt); gsub(/R/, "is", txt); gsub(/S/, "cc", txt); # Was "csc" in JSA gsub(/T/, "cc", txt); gsub(/V/, "?", txt); gsub(/Y/, "?", txt); # Now, the conversion from JSA to ECC: gsub(/[ql]j/, "H", txt); gsub(/[ql]g/, "P", txt); gsub(/ix/, "e", txt); gsub(/ij/, "k", txt); gsub(/is/, "r", txt); gsub(/iu/, "m", txt); gsub(/y/, "i", txt); gsub(/ci/, "a", txt); gsub(/cg/, "8", txt); gsub(/ig/, "8", txt); gsub(/cs/, "r", txt); gsub(/ii*e/, "e", txt); gsub(/ii*k/, "k", txt); gsub(/ii*r/, "r", txt); gsub(/ii*m/, "m", txt); gsub(/a/, "o", txt); return txt } /^ *$/ { print; next } /^ *#/ { if (pcmt) { print "# Output of fsg2ecc - Stolfi's error-tolerant encoding"; pcmt = 0 } print; next } /^<[^>]*> *$/ { print; next } /^