#! /n/gnu/bin/gawk -f # Last edited on 1998-07-12 15:00:34 by stolfi # Colorizes the output of "extract-signif-chars" by a tuple-indexed table. BEGIN { usage = ( \ "colorize-text-by-tuple \\\n" \ " -v order=ORDER \\\n" \ " [ -v filler=CHAR ] \\\n" \ " [ -v lowercase=BOOL ] \\\n" \ " -v table=TABLE \\\n" \ " [ -v default=DEFDIGS ] \\\n" \ " [ -v dmax=DMAX ] \\\n" \ " [ -v ymin=YMIN ] \\\n" \ " [ -v ymax=YMAX ] \\\n" \ " < SIGFILE > COLFILE" \ ); # # Colorizes letters in a text based on context. # # The file TABLE must have entries DIGS WORD where DIGS is a string # of digits and WORD a string of letters, both with length = ORDER. # # The input SIGFILE must have been produced by extract-signif-chars. # Let "txt[0...N-1]" be the sequence of significant # characters from SIGFILE (class 3). The program # will compute from them a string of numbers "val[0..N-1]", # as follows: starting with all zero "val"s, whenever # some substring "txt[i..i+ORDER-1]" matches some WORD from the # TABLE file, this program will add the digits of the associated # DIGS to "val[i..i+ORDER-1]". # # The final value of "val[i]" is then used to colorize the character # "txt[i]" in the standard output (using HTML formatting). Values in # the range 0 to DMAX (clipped) are mapped to the range of # intensities YMIN to YMAX (in [0 _ 1]) and pseudo-colored. # abort = -1; check_options(); # Color table, indexed by quantized "val[i]": n_colors = 10; for (i=0;i= 0) { exit(abort); } push_deco(decode(substr($0,2))); next; } /^[1]/{ if (abort >= 0) { exit(abort); } push_char(filler, "_"); push_deco(decode(substr($0,2))); next; } /^[2]/{ if (abort >= 0) { exit(abort); } for (i=1;i= 0) { exit(abort); } flush_tup(); # close last directive print_text("\n", ""); } function init_tup( i) { tup = ""; for (i=1;i "/dev/stderr"; if (m == "") { error("push_tup: empty m"); } # extend current tuple with new character: tup = (tup m); ext[order] = c; glu[order] = ""; val[order] = 0; if (wait == 0) { # Now "tup" must have "order" characters. # Find its digit string "ds" and add it to the "val" buffer: if (tup in dgs) { ds = dgs[tup]; } else { ds = default; } for (i=1; i<=order; i++) { d = substr(ds,i,1); val[i] += d; } # write out the first character of "tup": print_tup_head(); } else { wait--; } # Shift buffer left by 1: pop_tup(); } function push_deco(s) { if (order > 1) { glu[order-1] = (glu[order-1] s); } else { print_text(s, ignored_color); } } function print_tup_head( v) { # Prints fist character in "tup" buffer, and associated glue: v = int(val[1]/dmax*(n_colors-1)); v = (v < 0 ? 0 : (v >= n_colors ? n_colors-1 : v)); print_text(ext[1], xcolor[v]); print_text(glu[1], ignored_color); } function pop_tup( i) { # shift out first position of "tup" buffer and acessories: tup = substr(tup,2,order-1); for (i=1; i 20)) { error("funny \"order\""); } if (filler == "") { filler = "_"; } if (length(filler) != 1) { error(("the \"filler\" should be a single char")); } # --- lowercase mapping ---------------------------------------------- split("", map); for (i=0;i<256;i++) { c = sprintf("%c", i); map[c] = c; } if (lowercase == "") { lowercase = 1; } if (lowercase > 0) { ucs = "ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ"; lcs = "abcdefghijklmnopqrstuvwxyzàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþ"; for (i=1;i<=length(ucs);i++) { uc = substr(ucs,i,1); lc = substr(lcs,i,1); map[uc] = lc; } } if (table == "") { error("should define \"table\""); } while ( getline lin < table ) { split(lin, fld); if ((3 in fld) || !(2 in fld)) { error("file " table " line " NR ": bad format"); } d = fld[1]; w = fld[2]; if (length(w) != order) { error("file " table " line " NR ": wrong word length"); } if (length(d) != order) { error("file " table " line " NR ": wrong digs length"); } if (d !~ /^[0-9][0-9]*$/) { error("file " table " line " NR ": bad digs"); } dgs[w] = d; } if (default == "") { for (i=1;i<=order;i++) { default = (default "9"); } } if (dmax == "") { dmax = 9; } if (ymin == "") { ymin = 0.40; } if (ymax == "") { ymax = 1.00; } } function print_text(str, color) { if (str != "") { if (current_color != color) { if (current_color != "") { printf ""; } if (color != "") { printf "", color; } current_color = color; } gsub(/[&]/, "\\&", str); gsub(/[<]/, "\\<", str); gsub(/[>]/, "\\>", str); printf "%s", str; } } function error(msg) { printf "%s\n", msg >> "/dev/stderr"; abort = 1; exit 0; } function abs(x) { return (x >= 0 ? x : -x) } function rgb_from_hue(rgb, h, hf, hi) { while (h >= 1) { h = h - 1; } while (h < 0) { h = h + 1; } h = 6*h; hi = int(h); hf = h - hi; if (hi == 0) { rgb[0] = 1; rgb[1] = hf; rgb[2] = 0; } else if (hi == 1) { rgb[0] = 1-hf; rgb[1] = 1; rgb[2] = 0; } else if (hi == 2) { rgb[0] = 0; rgb[1] = 1; rgb[2] = hf; } else if (hi == 3) { rgb[0] = 0; rgb[1] = 1-hf; rgb[2] = 1; } else if (hi == 4) { rgb[0] = hf; rgb[1] = 0; rgb[2] = 1; } else if (hi == 5) { rgb[0] = 1; rgb[1] = 0; rgb[2] = 1-hf; } } function y_from_rgb(rgb) { return 0.30*rgb[0] + 0.60*rgb[1] + 0.10*rgb[2]; } function rgb_fix_y(rgb, y, yy, ar, aw, ab) { # mixes white or black into "rgb" so that its intensity is "y". yy = y_from_rgb(rgb); if (yy < y) { # mix white ar = (1-y)/(1-yy); aw = (y-yy)/(1-yy); rgb[0] = ar*rgb[0] + aw; rgb[1] = ar*rgb[1] + aw; rgb[2] = ar*rgb[2] + aw; } else if (yy > y) { # mix black ar = y/yy; rgb[0] = ar*rgb[0] + aw; rgb[1] = ar*rgb[1] + aw; rgb[2] = ar*rgb[2] + aw; } } function gamma(r) { return sqrt(r) } function xcolor_from_rgb(rgb, rr, gg, bb) { rr = int(gamma(rgb[0])*255 + 0.5); gg = int(gamma(rgb[1])*255 + 0.5); bb = int(gamma(rgb[2])*255 + 0.5); return sprintf("%02x%02x%02x", rr, gg, bb); } function xcolor_from_val(v,ymin,ymax, y,rgb) { # Assumes "v" is in [0 _ 1]. v = (v < 0 ? 0 : (v > 1 ? 1 : v)); rgb_from_hue(rgb, 0.6667 - v); y = ymin*exp((v)*log(ymax/ymin)); rgb_fix_y(rgb, y); # printf "v = %7.3f y = %6.4f rgb = (%6.4f,%6.4f,%6.4f)\n", \ # v, y, rgb[0], rgb[1], rgb[2] > "/dev/stderr"; return(xcolor_from_rgb(rgb)); } function xcolor_from_r_g_b_y(r,g,b,y, rgb) { split("", rgb); rgb[0] = r; rgb[1] = g; rgb[2] = b; rgb_fix_y(rgb, y); return(xcolor_from_rgb(rgb)); }