#! /n/gnu/bin/gawk -f # Validates the format of the original interlinear file. # Usage: $0 < infile >& bugs function error(msg) { printf "%s\n", $0; printf "line %d: %s\n", NR, msg > "/dev/stderr"; } function checktext(lin) { res = 1 if (length(lin) <= 19) { error("missing text"); res = 0 } RSTART = 0 match(lin, /^/) if (RSTART != 1) { error("bad location format"); res = 0 } if (substr(lin,RLENGTH+1, 19-RLENGTH) != substr(" ", 1, 19-RLENGTH)) { error("blanks missing"); res = 0 } if (substr(lin,20,1) == " ") { error("too many blanks"); res = 0 } txt = substr(lin,20,length(lin)-19) gsub(/{[^}]*}/, "", txt); gsub(/\[[-*%A-Z.24678]*[|][-*%A-Z.24678]*\]/, "", txt); gsub(/!*/, "", txt); gsub(/ *$/, "", txt); if (txt !~ /^[-*%A-Z.!24678]*[-=%]/) { error("invalid char in text"); res = 0 } return res } # blank lines /^ *$/ { next } # comment /^#/ { next } # panel declaration /^ *$/ { next } # sub-panel location declaration /^ *$/ { next } # line of anonymous text /^ / { checktext($0) next } # line of text in sub-page location /^ / { checktext($0) next } /./ { error("bad format"); next }