#! /usr/bin/gawk -f # Last edited on 1998-12-27 11:02:26 by stolfi BEGIN { abort = -1; usage = ( \ "cat INFILE \\\n" \ " | attach-reading-order \\\n" \ " -v table=TBLFILE \\\n" \ " [ -v trcodes=CHARS ] \\\n" \ " > OUTFILE\n" \ ); # Reads an EVMT file from stdin, writes to stdout all text # lines (and the attached #-comments, if any) with the # reading order, transcriber code, and input serial number # in columns 1-8. # # The transcriber code is mapped to an integer in 01..26 # in the order given by the "trcodes" parameter. # # An #-comment is considered attached to a line if it is # separated by at most one blank line just preceding the command. # # Each line of TBLFILE should have two words LOC ORDER specifying # the ORDER assigned to each locator LOC. The locator should # not include the "<>" delimiters nor the transcriber codes. # # Whenever an input location code is not found in the table, # a message is printed. if (table == "") arg_error("must specify \"-v table=FILE\"\n"); if (trcodes == "") { trcodes = "HCDFGTJILMKQRNPZABEOSWXYVU"; } split("", dic); nMap=0; while((getline lin < table) > 0) { if (! match(lin, /^[#]/)) { nfld = split(lin, fld); if (nfld != 2) arg_error(("bad table entry = \"" lin "\"")); if (fld[1] in dic) arg_error(("repeated key = \"" lin "\"")); dic[fld[1]] = fld[2]; nMap++; } } close (table); if (nMap == 0) { arg_error(("file \"" table "\" empty or missing")); } # printf "loaded %6d map pairs\n", nMap > "/dev/stderr" } // { if (abort >= 0) { exit abort; } } /^##/ { # Neutralized page/unit header flush(); nv = 0; next; } /^<[^[<>;]*> *(|{[^{}]*} *)$/ { # Unprotected page/unit header flush(); nv = 0; next; } /^ *$/{ # blank line next; } /^# *$/{ # blank comment next; } /^#/ { # comment lin[nv] = $0; trc[nv] = 00; nv++; next; } /^[<]/ { # text line if (match($0, /^/)) { loc = substr($0, RSTART+1, RLENGTH-4); tr = substr($0, RLENGTH-1, 1); if (loc != cur_loc) { flush(); } lin[nv] = $0; trc[nv] = index(trcodes, tr); nv++; cur_loc = loc; } else { format_error("bad locator"); } next; } //{ format_error("unrecognized format"); } END { if (abort >= 0) { exit abort; } flush(); } function flush( i,k) { if (cur_loc != "") { if (cur_loc in dic) { ord = dic[cur_loc]; } else { format_error("locator not in table"); ord = 0; } # Locate last data line: for(k=nv;((k>0) && (substr(lin[k-1],1,1) == "#"));k--) { } # Print data lines and preceding comments: printf "%06d %02d %03d %s\n", ord, 00, 00, "#"; for (i=0; i "/dev/stderr"; abort = 1; exit 1; } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; abort = 1; exit 1; }