#! /usr/bin/gawk -f # Last edited on 2002-03-05 01:50:38 by stolfi # Reads a file containing lines of the form # # SEC USEQ FNUM UNIT LINE TRAN FPOS RPOS PFRST PLAST WORD HEAD TAG # 1 2 3 4 5 6 7 8 9 10 11 12 13 # # Combines consecutive lines with same HEAD, TAG, WORD, FNUM, UNIT, and LINE # by concatenating the TRAN fields (possibly with repeat digits) and # ORing their PFRST and PLAST flags. Sets FPOS and RPOS to "*". BEGIN{ abort = -1; } (abort >= 0) {exit abort;} (NF == 13){ sec = $1; useq = $2; fnum = $3; unit = $4; nlin = $5; tran = $6; fpos = $7; rpos = $8; pfrst = $9; plast = $10; word = $11; head = $12; tag = $13; if ((head == ohead) && (tag == otag) && \ (word == oword) && (fnum == ofnum) && (unit == ounit) && \ (nlin == onlin)) { otran = concat_tran(otran,tran); opfrst = (opfrst || pfrst); oplast = (oplast || plast); } else { output_line(); osec = sec; ouseq = useq; ofnum = fnum; ounit = unit; onlin = nlin; otran = tran; ofpos = "*"; orpos = "*"; opfrst = pfrst; oplast = plast; oword = word; ohead = head; otag = tag; } next; } END { if (abort >= 0) {exit abort;} output_line(); } function concat_tran(ot,t, n,otp) { if (ot == "") { return t; } else { if (match(ot, /[0-9]+$/)) { n = substr(ot, RSTART); otp = substr(ot,1,RSTART-1); } else { n = 1; otp = ot; } if (substr(otp,length(otp),1) == t) { return (otp n+1); } else { return (ot t); } } } function output_line() { if (ofnum != "") { print osec, ouseq, ofnum, ounit, onlin, otran, \ ofpos, orpos, opfrst, oplast, oword, ohead, otag; } } /./{ data_error("bad line type"); } function data_error(msg) { printf "*** line %d: %s\n", FNR, msg > "/dev/stderr"; abort = 1; exit abort; }