# To be included in extract-reading-tuples, combine-versions # Last edited on 1998-12-30 10:58:00 by stolfi function tup_clear_current_batch() { # Empties the buffer that contains the current batch of versions # for a given line: # Location of current batch (including line number): tup_cur_loc = ""; # The current batch: tup_nb = 0; # number of input lines in this batch (incl. comments). split("", tup_batch); # "tup_batch[i]", "i=0..tup_nb-1" are the lines of the batch. } function tup_append_line_to_batch(lin) { # Appends a line to the curren batch, without checking anything. tup_batch[tup_nb] = lin; tup_nb++; } function tup_append_version_to_batch(lin, k) { # appends "lin" after last non-comment line of current batch k = tup_nb; while ((k>0) && (tup_batch[k-1] ~ /^#/)) { tup_batch[k] = tup_batch[k-1]; k--; } tup_batch[k] = lin; tup_nb++; } function tup_prepend_version_to_batch(lin, i,k) { # inserts "lin" before first non-comment line of current batch i = 0; while ((ii) { tup_batch[k] = tup_batch[k-1]; k--; } tup_batch[i] = lin; tup_nb++; } function tup_process_variant(lin,ignore, loc,tmp) { # Processes a new variant if (length(lin) <= 19) { fatal_error("missing text"); } # Check general format, and extract location code and text proper. # Note that line number must start with digit, # while the text unit code must start with letter: match(lin, /^/); if (RSTART != 1) { format_error("bad location format"); res = 0; } else { loc = substr(lin,RSTART+1,RLENGTH-2); if (substr(lin,RLENGTH+1, 19-RLENGTH) !~ /^[ ]*$/) { fatal_error("too few blanks"); } if (substr(lin,20,1) == " ") { fatal_error("too many blanks"); } # Validate location code # Split location into fields: tmp = length(loc); if (substr(loc, tmp-1, 1) != ";") { fatal_error("program error: semicolon"); } loc = substr(loc, 1, tmp-2); # If new batch, flush previous one: if (loc != tup_cur_loc) { tup_process_current_batch(ignore); } # Save version in batch buffer: tup_append_line_to_batch(lin); # Update current location: tup_cur_loc = loc; } } function tup_process_current_batch(ignore, \ nv,txt,trn,trseen,len,nc,i,loc,lin,tx,trc) { # Processes a batch of variants and comments for one VMS line, # generating tuples. Calls the client functions # process_tuples and output_batch_line nv = 0; split("", txt); # "txt[i]", "i=0..nv-1]" are the variant texts. split("", trn); # "trn[i]" the corresponding transcriber codes, 1..26. split("", trseen); # "trseen[tr] = 1" means transcriber "tr" has occurred. nc = -1; # length of text in lines of current batch. for(i=0; i]/)) { fatal_error("program error: tr code"); } trc = substr(lin, RSTART+1, 1); if (trc in trseen) { fatal_error("repeated transcription code"); res = 0; } else if (index(ignore, trc) == 0) { trseen[trc] = 1; # Save for later tuple extraction: txt[nv] = tx; tk = index("ABCDEFGHIJKLMNOPQRSTUVWXYZ", trc); if (tk == 0) { fatal_error("program error: trc"); } trn[nv] = tk; nv++; } } } if (nv > 0) { # Call client function to process VMS texts process_batch_texts(tup_cur_loc,txt,trn,nv,nc); } # Call client function to dispose of batch lines: process_batch_lines(tup_batch, tup_nb); tup_clear_current_batch() } function tup_remove_comments(txt, chunk,i,res) { # Replaces {}-comments by an equal length of "!"s res = ""; while (txt != "") { i = index(txt, "{"); if (i == 0) { res = (res txt); txt = ""; } else { res = (res substr(txt, 1, i-1)); txt = substr(txt, i); i = index(txt, "}"); if (i == 0) { format-error("mismatched `{'"); res = (res txt); txt = ""; } while (length(tup_bangs) < i) { tup_bangs = ( tup_bangs tup_bangs "!"); } res = (res substr(tup_bangs, 1,i)); txt = substr(txt, i+1); } } return res; } function tup_extract_tuples(txt,trn,nv,nc,tuple, i,j,k,d,tup,r) { # For each character position "j=1..nc", creates a tuple # "tuple[j]" from "txt[i]", "i=0..nv-1". # Check lengths, just to be sure: nc = length(txt[0]); for (i=1; i