#! /bin/gawk -f # Last edited on 2002-01-16 12:54:50 by stolfi BEGIN { abort = -1; usage = ( "reencode-words-viqr-for-tex \\\n" \ " [ -v field=NUM ] \\\n" \ " < INFILE.wct > OUTFILE.tex" \ ); # Assumes that field number FIELD of the input is a # word in VIQR vietnamese encoding, possibly factored into letters by braces "{}". # Adds `\' in front of some characters to allow typesetting # in TeX (with proper fonts). if (field == "") { arg_error("must specify \"field\""); } } (abort >= 0) { exit abort; } /^ *([#]|$)/ { print; next; } /./ { if (NF < field) { data_error("bad NF"); } w = $(field); if (w !~ /^[-*{}a-zA-Z?(+^.`'~]+$/) { data_error(("bad word \"" w "\"")); } w = reencode_viqr_for_tex(w); $(field) = w; print; next; } function reencode_viqr_for_tex(w) { # Protect special characters: gsub(/[\^]/, "\\^", w); gsub(/[{]/, "\\{", w); gsub(/[}]/, "\\}", w); gsub(/[#]/, "\\#", w); # Just in case gsub(/[&]/, "\\&", w); # Just in case gsub(/[$]/, "\\$", w); # Just in case gsub(/[%]/, "\\%", w); # Just in case return w; } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit 1; } function data_error(msg) { printf "line %d: %s\n", FNR, msg > "/dev/stderr"; abort = 1; exit 1; }