#! /usr/bin/gawk -f # Last edited on 1999-01-29 21:04:55 by stolfi # Usage: $0 < INFILE > OUTFILE # Removes all comments from an evt-like file. # Chooses the first alternative of every "[...|...|...]" group. # Leaves only the location code (if any) and the Voynich text. BEGIN {abort = -1;} (abort >= 0) { exit abort; } /^#/ { next; } /<[^.;]*>/{ print; next; } /./ { if (substr($0,1,1) == "<") { skip = 19; loc = substr($0,1,19); gsub(/ *$/, "", loc); if ( loc !~ /^$/ ) { printf "line %d, bad location \"%s\"\n", NR, loc > "/dev/stderr" } } else { skip = 0; loc = (""); } if (skip >= length($0)) next; txt = cleanup(substr($0,1+skip)); printf "%-19s%s\n", loc, txt; next; } function cleanup(txt) { # Removes crud from text # We discard "%" and "!". gsub(/[ !]/, "", txt); gsub(/[%]/, " ", txt); # We discard "{}"-comments: gsub(/\{[^}]*\}/, "", txt); # We choose arbitrarily the first of alternative transcriptions: gsub(/\[/, "", txt); gsub(/\|[^\]]*\]/, "", txt); gsub(/\]/, "", txt); return txt }