#! /usr/bin/gawk -f # Last edited on 2024-07-18 07:59:59 by stolfi # Reads a ".sdf", ".csdf", or ".csf" file and checks whether it has the # proper format. Bad lines are flaged and written to {stderr}, while # good ones are written to stdout. # # The user must define (with "-v") the fariable "format" as "sdf", # "csdf", or "csf". # # In particular, rejects a line if the name contains space, newline, # single or double quotes, semicolon, hashmark, and parentheses. BEGIN { if (format == "") { printf "** must define {format}\n" > "/dev/stderr"; exit 1 } else if (format == "sdf") { NF_exp = 3; kck = -1; ksz = 1; kdt = 2; } else if (format == "csf") { NF_exp = 3; kck = 1; ksz = 2; kdt = -1; } else if (format == "csdf") { kck = 1; ksz = 2; kdt = 3; NF_exp = 4; } else { printf "** invalid {format} = \"%s\"\n", format > "/dev/stderr"; exit 1 } } //{ ok = 1; if (NF != NF_exp) { data_error(( "bad NF = " NF)); ok = 0; } if (kck >= 1) { cksum = $(kck); if (! match(cksum, /^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]$/)) { data_error(("bad {CKSUM} = \"" cksum "\"")); ok = 0; } } if (ksz >= 1) { size = $(ksz); if (! match(size, /^[0-9]+$/)) { data_error(("bad {SIZE} = \"" size "\"")); ok = 0; } } if (kdt >= 1) { date = $(kdt); if (! match(date, /^[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]-[0-9][0-9][0-9][0-9][0-9][0-9]$/)) { data_error(("bad {DATE} = \"" date "\"")); ok = 0; } } fname = $(NF); if (match(fname, /[ '"\012;#()]/)) { data_error(("bad {FNAME} = \"" fname "\"")); ok = 0; } if (match(fname, /[\/][\/]/)) { data_error(("bad {FNAME} = \"" fname "\"")); ok = 0; } if (ok) { print; } next } function data_error(msg) { printf "** %s: «%s»\n", msg, $0 > "/dev/stderr"; }