#! /usr/bin/gawk -f # Reads each line from stdin, maps a selected field through a table, # and inserts the result as a specified new field. Usage: # # cat INFILE \ # | map-field \ # [ -v inField=FLDNUM ] \ # [ -v outField=FLDNUM ] \ # -v table=TBLFILE \ # [ -v default=DEFSTRING ] \ # > OUTFILE # # The inField and outField are "1" is not specified, # i.e. the new field is prependedn to the original record. # # Each line of TBLFILE should have two words OLD NEW specifying # the mapping from OLD field values to NEW field values. # # Whenever an input field is not found in the table, the default # string DEFSTRING is used if not empty, otherwise the script aborts # with an error message. function error(msg) { printf "line %d: %s\n", NR, msg > "/dev/stderr" abort = 1 exit } function printout(mw, fn, i) { # prints $0 with "mw" inserted as field "$(fn)" if (NF < fn-1) { error("not enough output fields\n"); } if (fn == 1) { print mw, $0; } else if (fn == NF+1) { print $0, mw; } else { for (i=1;i 0) { split(lin, fld); if ((3 in fld) || ! (2 in fld)) error("bad table entry = \"" lin "\""); if (fld[1] in dic) error("repeated key = \"" lin "\""); dic[fld[1]] = fld[2]; nMap++; } close (table); printf "loaded %6d map pairs\n", nMap > "/dev/stderr" } /^#/ { if (abort) exit; print; next; } /./ { if (abort) exit; if (NF < inField) { error("not enough input fields\n"); } x = $(inField); if (x in dic) { y = dic[x]; } else { if (default=="") { error("old key not in table\n"); } else { y = default; } } printout(y, outField); next; }