#! /usr/bin/gawk -f # Last edited on 1998-12-30 11:36:14 by stolfi BEGIN { abort = -1; usage = ( \ "extract-reading-tuples \\\n" \ " -f tuple-procs.gawk \\\n" \ " < INFILE.evt > OUTFILE.tup " \ ); # Reads an interlinear file in EVMT format (EVA encoding) and writes # a list of 26-character tuples, one for each character position # present in the interlinear. # # The tuple for a given character position consists of the readings # of that position by all 26 (potential) transcribers, "A" thru "Z". # The "%" reading is assumed whenever a character position is not # covered by a particular transcription. # # The spaces, line breaks, para breaks, and the fillers "!" and "%" # are viewed as readings, too. In-line comments are replaced by "!" # fillers, preserving alignment. tup_clear_current_batch(); # Various counts: n_lines = 0; # VMS text lines. n_variants = 0; # Interlinear text lines read. n_used = 0; # Interlinear text lines used in tuples. n_tuples = 0; # Tuples written } //{ if (abort >= 0) { exit abort; } } # Blank line /^ *$/ { next; } # `##'-comment (page/unit header) /^[#][#]/ { tup_process_current_batch(""); next; } # Other `#'-comment /^[#]/ { next; } # Uncommented page/unit header /^<[^<>;]*>/ { tup_process_current_batch(""); next; } # Text line /^=0) { exit abort; } tup_process_current_batch(""); printf "%7d VMS text lines found\n", n_lines > "/dev/stderr"; printf "%7d interlinear text lines read\n", n_variants > "/dev/stderr"; printf "%7d interlinear text lines used\n", n_used > "/dev/stderr"; printf "%7d tuples written\n", n_tuples > "/dev/stderr"; } # Client functions called by tup_process_current_batch: function process_batch_texts(loc,txt,trn,nv,nc, tuple,j) { # Called with the cleaned-up texts, without comments. # Extract the tuples and print them: split("", tuple); tup_extract_tuples(txt,trn,nv,nc,tuple); for (j=1; j<=nc; j++) { print tuple[j]; n_tuples++; } split("", tuple); n_used += nv; n_lines ++; } function process_batch_lines(batch,nb) { # Called with the original (and new) lines, including comments. # Ignore them: } function arg_error(msg) { printf "*** %s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit abort; } function fatal_error(msg) { printf "file %s, line %d: *** %s\n", FILENAME, FNR, msg > "/dev/stderr"; abort = 1; exit abort; } function format_error(msg) { printf "file %s, line %d: %s\n", FILENAME, FNR, msg > "/dev/stderr"; } function print_line(lin) { printf "file %s, line %d: %s\n", FILENAME, FNR, lin > "/dev/stderr"; printf "\n" > "/dev/stderr"; }