#! /usr/bin/gawk -f # Recoding an interlinear file from the FSG alphabet to # my Lossy Ad-hoc Semi-Analytic Fault-Tolerant encoding BEGIN { print "# Output of fsg2hop - Stolfi's Semi-Analytic Fault-Tolerant alphabet" } /^ *$/ { print; next } /^ *#/ { print; next } /^<[^>.;]*>/ { print; next } /^<[^>]*\.[^>]*;[A-Z]> / { curtxt = substr($0,20) # We discard "%" and "!" since the conversion # will destroy synchronism anyway. gsub(/[%!]/, "", curtxt); # First, the conversion from FSG to JSA (Stolfi's super-analytic) gsub(/IIIK/, "iiiij", curtxt); gsub(/IIIL/, "iiiiu", curtxt); gsub(/IIIR/, "iiiis", curtxt); gsub(/IIIE/, "iiiix", curtxt); gsub(/IIE/, "iiix", curtxt); gsub(/IIR/, "iiis", curtxt); gsub(/IIK/, "iiij", curtxt); gsub(/HZ/, "cqjc", curtxt); gsub(/PZ/, "cqgc", curtxt); gsub(/DZ/, "cljc", curtxt); gsub(/FZ/, "clgc", curtxt); gsub(/IE/, "iix", curtxt); gsub(/IR/, "iis", curtxt); gsub(/IK/, "iij", curtxt); gsub(/2/, "cs", curtxt); gsub(/4/, "q", curtxt); gsub(/6/, "cj", curtxt); gsub(/7/, "ig", curtxt); gsub(/8/, "cg", curtxt); gsub(/A/, "ci", curtxt); gsub(/C/, "c", curtxt); gsub(/D/, "lj", curtxt); gsub(/E/, "ix", curtxt); gsub(/F/, "lg", curtxt); gsub(/G/, "cy", curtxt); gsub(/H/, "qj", curtxt); gsub(/I/, "i", curtxt); gsub(/K/, "ij", curtxt); gsub(/L/, "iu", curtxt); gsub(/M/, "iiiu", curtxt); gsub(/N/, "iiu", curtxt); gsub(/O/, "o", curtxt); gsub(/P/, "qg", curtxt); gsub(/R/, "is", curtxt); gsub(/S/, "csc", curtxt); gsub(/T/, "cc", curtxt); gsub(/V/, "?", curtxt); gsub(/Y/, "?", curtxt); # Now, the conversion from JSA to HOP: gsub(/[ql]j/, "H", curtxt); gsub(/[ql]g/, "P", curtxt); gsub(/cs/, "z", curtxt); gsub(/ij/, "k", curtxt); gsub(/ix/, "e", curtxt); gsub(/is/, "r", curtxt); gsub(/iiu/, "n", curtxt); gsub(/y/, "i", curtxt); gsub(/ci/, "a", curtxt); gsub(/cg/, "8", curtxt); gsub(/ir/, "w", curtxt); gsub(/i*n/, "m", curtxt); print (substr($0,1,19) curtxt); next }