#! /usr/bin/gawk -f # Last edited on 1999-07-28 01:43:47 by stolfi BEGIN{ abort = -1; usage = "diff-dot-product ORG POS VEC"; # # where ORG, POS and VEC are names of files, and each line in them # has the format COORD LABEL, where COORD is a real number and LABEL # is any word. (The LABELs must be sorted and must match in both # files.) The COORD field of the ith line is interpreted as the # ith coordinate of a vector in some high-dimensional space. # # After reading all three files, prints to stdout # a single number, the sum of (POS[i] - ORG[i])*VEC[i] # for all coordinates of the three vectors. if (ARGC != 4) { error(("ARGC = " ARGC " - usage: " usage)); } org = ARGV[1]; if (org == "") { error(("usage: " usage)); } pos = ARGV[2]; if (pos == "") { error(("usage: " usage)); } vec = ARGV[3]; if (vec == "") { error(("usage: " usage)); } N = 0; prod = 0; while ((getline < org) > 0) { N++; if (NF != 2) { error((org ", line " N ": bad format")); } w = $2; oi = $1; getline < pos; if (ERRNO != "0") { error((pos ": " ERRNO)); } if ((NF != 2) || (w != $2)) { error((pos ", line " N ": bad format")); } pi = $1; getline < vec; if (ERRNO != "0") { error((vec ": " ERRNO)); } if ((NF != 2) || (w != $2)) { error((vec ", line " N ": bad format")); } vi = $1; prod += (pi-oi)*vi; } if (ERRNO != "0") { error((org ": " ERRNO)); } close(org); close(pos); close(vec); printf "%+8.5f", prod; } function error(msg) { printf "%s\n", msg > "/dev/stderr"; abort=1; exit(1); }