#! /n/gnu/bin/gawk -f # Last edited on 1998-07-27 02:07:42 by stolfi BEGIN { usage = ( "tabulate-triple-counts \\\n" " -v percent=[0|1] \\\n" " -v prefs="P1 P2..." \\\n" " -v midfs="M1 M2..." \\\n" " -v suffs="S1 S2..." \\\n" " < TRCOUNTS > TABLES" # The records of TRCOUNT must have the form COUNT P M F # where COUNT is an integer and P,M,F are strings. # # One of the three strings, selected by the "which" variable, # is called the "slice"; the other two are the "left" and "right" # strings. The file must be sorted by slice. # # This script prints one table for each slice K, # where element [I,J] is the count of triples # whith slice = K, left = I and right = J. # # If "percent" is true, prints the percentage of the # count relative to the table total, instead of the count # itself. abort = 0; if (which == "P") { K = 1; I = 2; J = 3; } else if (which =="M") { K = 2; I = 1; J = 3; } else if (which =="S") { K = 3; I = 1; J = 2; } else { error("bad \"which\""); } # Below "r" is a field index in a triple (1,2, or 3). # The string "lab[r,s]" is the "s"th alternative for field "r". # The count "tot[w]" is the count of triples in the current slice # The count "nlab[r]" is the number of entries in "lab[r,*]" split("", lab); split("", tot); split("", nlab); getlabs(prefs, 1, lab, nlab); getlabs(midfs, 2, lab, nlab); getlabs(suffs, 3, lab, nlab); } function getlabs(str, r, lab, nlab, fld,s,n) { # Splits a list of strings, # stores the "s"th elements in "lab[r,s]", # stores the number of elements in "nlab[r]", n = split(str, fld); nlab[r] = n; for(s=1;s<=n;s++) { lab[r,s] = fld[s]; } } /./{ if (abort >= 0) { exit abort; } if (NF != 2) { error(("line " NR ": bad record format")); } nf = split($2, fld, ","); if (nf != 3) { error(("line " NR ": bad triple format")); } checklab(fld[1], 1, lab, nlab); checklab(fld[2], 2, lab, nlab); checklab(fld[3], 3, lab, nlab); function checklab(str, r, lab, nlab, fld,s,n) { # Appends "str" to "lab" (incrementing "nlab") if not yet there. if nlab[r] = n; for(s=1;s<=n;s++) { lab[r,s] = fld[s]; } } function error(msg) { printf "%s\n", msg > "/dev/stderr"; abort = 1; exit 1; }