#! /usr/bin/gawk -f # Last edited on 2002-03-05 01:42:49 by stolfi BEGIN { abort = -1; usage = ( \ "cat INFILE \\\n" \ " | assign-headings \\\n" \ " -v table=TBLFILE \\\n" \ " > OUTFILE " \ ); # Reads a file containing records of the form # # SEC USEQ FNUM UNIT LINE TRAN FPOS RPOS PFRST PLAST WORD # 1 2 3 4 5 6 7 8 9 10 11 # # Reads also a table of the form SWORD HEAD where SWORD is a # special word and HEAD is its canonical form (report heading). # # Outputs every input record whose WORD is either equal to some # SWORD (strong occurrence), or to some SWORD minus an initial gallows # (weak occurrence). Multiple matches are printed multiple times. # Appends to each output record the corresponding HEAD ($12) and # a digit TAG ($13) which is 2 for a strong occurrence on column 1, # 1 for a strong occurrence elsewhere, and 0 for a weak occurence. split("", head); # `head[w]' is the heading for word `w'. split("", weak); # `weak[w]' are the headings for which `w' is weak occur. split("", isweak); # `isweak[w,h]' is 1 iff w is a weak occurrence of `h'. if (table == "") { arg_error("must defined \"table\""); } read_table(table, head,weak); } (abort >= 0) { exit abort; } (NF == 11) { fpos = $7; w = $11; if (w in head) { # w is a variant of heading head[w] print $0, head[w], (fpos == 1 ? 2 : 1); } if (w in weak) { # w is a weak occurrence of some variant of heading head[w]. nhs = split(weak[w], hs, ","); for (i = 1; i <= nhs; i++) { print $0, hs[i], 0; } } next; } /./{ data_error("bad line type"); } function read_table(fname,head,weak, ntbl,nlin,lin,fld,nfld,w,h) { ntbl=0; nlin=0; while((getline lin < fname) > 0) { nlin++; if (! match(lin, /^[ \011]*([#]|$)/)) { gsub(/[#].*$/, "", lin); nfld = split(lin, fld, " "); if (nfld != 2) tbl_error(fname, nlin, ("bad table entry = \"" lin "\"")); w = fld[1]; h = fld[2]; if (w in head) tbl_error(fname, nlin, ("repeated key = \"" lin "\"")); head[w] = h; if (w ~ /^[ktpf]/) { w = substr(w, 2); if (w in weak) { if (! ((w,h) in isweak)) { weak[w] = ( weak[w] "," h); } } else { weak[w] = h; } isweak[w,h] = 1; } ntbl++; } } if (ERRNO != "0") { tbl_error(fname, nlin, ERRNO); } close (fname); if (nlin == 0) { arg_error(("file \"" fname "\" empty or missing")); } # printf "loaded %6d table pairs\n", ntbl > "/dev/stderr" } function tbl_error(f,n,msg) { printf "file %s, line %d: %s\n", f, n, msg > "/dev/stderr"; abort = 1; exit 1 } function data_error(msg) { printf "*** line %d: %s\n", FNR, msg > "/dev/stderr"; abort = 1; exit abort; } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit 1 }