#! /usr/bin/gawk -f # Last edited on 2002-03-01 23:18:47 by stolfi # Extracts the bare text lines from an EVMT file, # discarding comments, page headers, and locators. /^#/ {next;} /^ *$/ {next;} /^<[^>]*> *$/ {next;} // { gsub(/^<[^>]*> */, "", $0); gsub(/{[^{}]*}/, "", $0); gsub(/{[^{}]*}/, "", $0); gsub(/[\!]+/, "", $0); gsub(/[-\/=., ]+/, ".", $0); gsub(/^[.]+/, "", $0); gsub(/[.]+$/, "", $0); gsub(/[*%]/, "?", $0); print; next; }