#! /usr/bin/gawk -f # Last edited on 2002-03-04 21:39:46 by stolfi # Reads an EVMT-format file, with basified weirdos. Removes all # comments, fillers, and empty lines, turns all spaces and breaks into " ", # inserts a beginning-of-paragraph ("=") or beginning-of-line ("-") # in front of the text, and removes the "<>" around the line locator. BEGIN{ abort = -1; parag_break = 1; } (abort >= 0) {exit abort;} /^ *([#]|$)/ {next;} /^<[^>]*> *([#]|$)/ {next;} // { # Split line into locator and text: if (match($0, /^[<][f][0-9]+[rv][0-9]*[.][A-Za-z][0-9]*[.][0-9]+[a-z]?([;][A-Z])?[>]/)) { loc = substr($0, 2, RLENGTH-2); lin = substr($0, RLENGTH+1); } else { data_error("bad locator format"); } # Remove internal comments and fillers: gsub(/{[^{}]*}/, "", lin); gsub(/{[^{}]*}/, "", lin); gsub(/[!]/, "", lin); gsub(/[?%]/, "*", lin); # Decide whether line is parag-initial: nlin = loc; gsub(/[;][A-Za-z]+$/, "", nlin); if (nlin != onlin) { first_in_parag = parag_break; parag_break = 0; onlin = nlin; } # Remember whether the line (any version) was parag-final: last_in_parag = (lin ~ /[=][-\/., ]*$/); # Replace all EVA spaces by ascii space: gsub(/[-\/=., ]+/, " ", lin); gsub(/^[ ]+/, "", lin); gsub(/[ ]+$/, "", lin); # Write out: if (lin != "") { print loc, (first_in_parag ? "=" : "-" ), lin, (last_in_parag ? "=" : "-" ); } parag_break = last_in_parag; next; } /./{ data_error("bad line type"); } function data_error(msg) { printf "*** line %d: %s\n", FNR, msg > "/dev/stderr"; abort = 1; exit abort; }