#! /n/gnu/bin/gawk -f # Last edited on 2004-05-28 01:11:47 by stolfi # Reads each line from stdin, maps a selected field through a table, # and inserts the result as a specified new field. Usage: # # cat INFILE \ # | map-field \ # [ -v inField=FLDNUM ] \ # [ -v outField=FLDNUM ] \ # -v table=TBLFILE \ # [ -v default=DEFSTRING | forgiving=BOOL ] \ # > OUTFILE # # The inField and outField are "1" if not specified, # i.e. the new field is prepended to the original record. # # Each line of TBLFILE should have two words OLD NEW specifying # the mapping from OLD field values to NEW field values. # # Whenever an input field is not found in the table, if # "forgiving" is set, leaves it alone; otherwise, the default # string DEFSTRING is used if not empty; otherwise the script aborts # with an error message. function printout(mw, fn, i) { # prints $0 with "mw" inserted as field "$(fn)" if (NF < fn-1) { error("not enough output fields\n"); } if (fn == 1) { print mw, $0; } else if (fn == NF+1) { print $0, mw; } else { for (i=1;i 0) { if (! match(lin, /^[\#]/)) { nfld = split(lin, fld, " "); if ((nfld >= 3) && (fld[3] ~ /^[\#]/)) { nfld = 2; } if (nfld != 2) error(("bad table entry = \"" lin "\"")); if (fld[1] in dic) error(("repeated key = \"" lin "\"")); dic[fld[1]] = fld[2]; nMap++; } } if (ERRNO != "0") { error((table ": " ERRNO)); } close (table); if (nMap == 0) { arg_error(("file \"" table "\" empty or missing")); } # printf "loaded %6d map pairs\n", nMap > "/dev/stderr" } (abort >= 0) { exit abort; } /^#/ { print; next; } /./ { if (abort >= 0) { exit abort; } if (NF < inField) { error("not enough input fields\n"); } x = $(inField); if (x in dic) { y = dic[x]; } else { if (forgiving) { y = x; } else if (default != "") { y = default; } else { error(("key \"" x "\" not in table\n")); } } printout(y, outField); next; } function error(msg) { printf "line %d: %s\n", NR, msg >> "/dev/stderr"; abort = 1; exit 1 } function arg_error(msg) { printf "%s\n", msg >> "/dev/stderr"; abort = 1; exit 1 }