#! /usr/bin/gawk -f # Last edited on 1999-01-06 09:56:48 by stolfi # Usage: "$0 OCCFILE < TEXT" # Reads a text in EVT format and a file OCCFILE of word occurrences, # and prints the latter interspersed with the former, properly indented. # # Each line of the OCCFILE represents one occurrence of some pattern # in the TEXT, and it must have the form # LOCPOS GLOBPOS PAT OBS # where is a location code (e.g. or ), # exactly as it apepars in the TEXT; LOCPOS is the displacement of the pattern's occurrence # relative to the beginning of that text line (where LOCPOS=0 means column 20); # GLOBPOS is the displacement since the beginning of the indexed text; PAT # is the pattern in question; and OBS is an optional comment (at most 17 bytes long). # # The s found in the OCCFILE must occur in the same order # as they appear in the TEXT. # # The lines of TEXT are printed with no change. After each line # of TEXT that contains a location code <...> starting at column 1, # this program prints all lines of OCCFILE that have exactly the same # location code. Each line contains the OBS field in column 3, # and the PAT field in column 20+LOCPOS. function getloc(lin, recno) { # Extracts a location code <...> from the beginning of "lin" match(lin, /^<[^>]*>/); if (RSTART == 0) { error((recno ": bad location = \"" substr(lin,1,19) "\"")); return "" } else { return substr(lin,RSTART,RLENGTH) } } function readocc() { # reads a line of the OCCFILE into "olin", splits it into "ofld" # defines "oloc" and oobs, increments NO. # # Sets oloc = "" upon end-of-file # if ((getline olin < wfile) > 0) { NO++; oloc = getloc(olin, ("oc line " NO)); } else { olin = ""; oloc = ""; oobs = "" } if (ERRNO != "0") { error((wfile ": " ERRNO)); } split(olin, ofld) if (5 in ofld) { oobs = ofld[5] } else { oobs = "" } } BEGIN { abort = 0 usage = (ARGV[0] " OCCFILE < TEXT") if (ARGC != 2) { error(("usage: " usage)); } wfile = ARGV[1]; ARGC = 1 nRefs = 0 readocc() } /#/ { if (abort) exit; print; next } /./ { if (abort) exit; print tloc = getloc($0, ("tx line " NR)) while ((oloc != "") && (oloc == tloc)) { pos = 20 + ofld[2]; printf " %s ", oobs; for(j=4+length(oobs);j "/dev/stderr" } function error(msg) { printf "%s\n", msg > "/dev/stderr" abort = 1 exit }