#! /usr/bin/gawk -f # Last edited on 1998-12-29 15:35:00 by stolfi BEGIN { abort = -1; usage = "compute-consensus < TUPLECTS > TUPLEMAP"; # Reads a file whose lines have the form COUNT TUPLE, where COUNT is # an integer and TUPLE is a string of 26 characters representing the # readings of one VMS character position by 26 potential # transcribers ("A" thru "Z"). In this list "%" denotes "no # information" and "*" denotes "unreadable". # # Writes a file of the form COUNT TUPLE CONS where COUNT and TUPLE # are as in the input, and CONS is the consensus reading for that # tuple. # # Let's say that a reading is "significant" if it is not "%". If all # significant readings are equal to the same letter C (including # "!"), the consensus reading is C; otherwise, if all significant # readings are either "!", ".", or ",", the consensus reading is # ","; otherwise it is "*". alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; } /./ { if (NF != 2) format_error("wrong num of fields"); ct = $1; tp = $2; if (length(tp) != 26) { format_error("bad tuple length"); } # Compute consensus: cons = "%"; for(i=1; ((i<=26) && (cons != "*")); i++) { c = substr(tp,i,1); if (c != "%") { nreaders++; } if ((cons == c) || (c == "%")) { # No change in consensus } else if (cons == "%") { # First significant reading cons = c; } else if ( \ ( (cons == ",") || (cons == ".") || (cons == "!") ) && ( (c == ",") || (c == ".") || (c == "!") ) \ ) { # Significant readings differ but are either space or empty # Say that the consensus is "uncertain space": cons = ","; } else { # No consensus cons = "*"; } } printf "%7d %s %s\n", ct, tp, cons; next; } // { next; } function arg_error(msg) { printf "*** %s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit abort; } function table_error(msg) { printf "file %s, line %d: *** %s\n", FILENAME, FNR, msg > "/dev/stderr"; abort = 1; exit abort; } function format_error(msg) { printf "file %s, line %d: %s\n", FILENAME, FNR, msg > "/dev/stderr"; abort = 1; exit abort; } function program_error(msg) { printf "*** %s\n", msg > "/dev/stderr"; abort = 1; exit abort; }