#! /usr/bin/gawk -f # Last edited on 1999-02-01 06:44:11 by stolfi # Reads each line from stdin, maps all fields through a table, # replacing the previous fields. Usage # # cat INFILE \ # | map-field \ # -v table=TBLFILE \ # [ -v fields=FIELDLIST ] \ # [ -v default=DEFSTRING | -v forgiving=BOOL ] \ # > OUTFILE # # Each line of TBLFILE should have two words OLD NEW specifying # the mapping from OLD field values to NEW field values. # # The FIELDS should be a list of field indices (counting from 1) # separated by commas. If not specified, all fields are mapped. # # Whenever an input field is not found in the table, if # "forgiving" is set, leaves it alone; otherwise, the default # string DEFSTRING is used if not empty; otherwise the script aborts # with an error message. # function error(msg) { printf "line %d: %s\n", NR, msg > "/dev/stderr" abort = 1 exit } # === ACTIONS =================================================== BEGIN { abort = 0; if (table == "") error("must specify \"-v table=FILE\"\n"); if ((forgiving != "") && (default != "")) { error("can't specify \"default\" with \"forgiving\""); } else if ((forgiving == "") && (default == "")) { forgiving = 0; } if (fields != "") { nfields = split(fields, fnum, /,/); } else { nfields = 0; } split("", dic); nMap=0; while((getline lin < table) > 0) { if (! match(lin, /^[#]/)) { split(lin, fld, " "); if ((3 in fld) || ! (2 in fld)) error("bad table entry = \"" lin "\""); if (fld[1] in dic) error("repeated key = \"" lin "\""); dic[fld[1]] = fld[2]; nMap++; } } if (ERRNO != "0") { error((table ": " ERRNO)); } close (table); if (nMap == 0) { arg_error(("file \"" table "\" empty or missing")); } # printf "loaded %6d map pairs\n", nMap > "/dev/stderr" } /^#/ { if (abort) exit; print; next; } /./ { if (abort) exit; if (nfields == 0) { n = NF; } else { n = nfields; } for(i=1;i<=n;i++) { if (nfields == 0) { x = $(i); } else { x = $(fnum[i]); } if (x in dic) { y = dic[x]; } else { if (forgiving) { y = x; } else if (default != "") { y = default; } else { error(("line " NR ": key \"" x "\" not in table\n")); } } if (nfields == 0) { $(i) = y; } else { $(fnum[i]) = y; } } print; next; }