#! /usr/bin/gawk -f # Last edited on 1999-07-28 01:55:51 by stolfi # Colorizes the output of "extract-signif-chars" by a tuple-indexed table. BEGIN { usage = ( \ "show-local-entropy \\\n" \ " -v order=ORDER \\\n" \ " -v place=PLACE \\\n" \ " -v table=TABLE \\\n" \ " [ -v filler=CHAR ] \\\n" \ " [ -v lowercase=BOOL ] \\\n" \ " -v hmin=HMIN \\\n" \ " -v hmax=HMAX \\\n" \ " [ -v ymin=YMIN ] \\\n" \ " [ -v ymax=YMAX ] \\\n" \ " [ -v colors=NCOLORS ] \\\n" \ " < SIGPAGE > COLPAGE" \ ); # # Colorizes letters in a text based on the local conditional entropy. # # The file TABLE must have entries VALUE WORD where VALUE is a real # value, and WORD a string of letters with length = ORDER. # # The input file SIGPAGE must have been produced by # extract-signif-chars. # Only the "significant" letters (class 3) and breaks in SIGPAGE are # considered. A word break (class 1) # is replaced by the FILLER character, and a paragraph break (class # 2) is replaced by ORDER-1 FILLERs. If "lowercase" # is set, class 3 characters are downcased before comparisons # and tabulations. # # Let "txt[0...N-1]" be the sequence of significant characters # (class 3) from SIGPAGE, plus the FILLERs resulting from break # expansion. The program will compute from them a string of numbers # "val[0..N-1]", as follows: starting with all zero "val"s, whenever # some substring "txt[i..i+ORDER-1]" matches some WORD from the # TABLE file, this program will set "val[i+PLACE-1]" to the # corresponding VALUE. # # The final value of "val[i]" is then used to colorize the character # "txt[i]" in the standard output (using HTML formatting). Values in # the range HMIN to HMAX (clipped) are mapped to NCOLORS discrete # colors with varying hues and increasing intensities in the range # YMIN to YMAX (in [0 _ 1]) (plus two colors for out-of-range # values). # # Non-significant characters (class 0) are printed in the # appropriate places, with the default color, but are skipped over # when forming tuples. abort = -1; check_options(); read_value_table(table, order); # Color table: make_color_table(colors, hmin, hmax, ymin, ymax); current_color = ""; init_tup(); } /^[0]/{ if (abort >= 0) { exit(abort); } push_deco(decode(substr($0,2))); next; } /^[1]/{ if (abort >= 0) { exit(abort); } push_char(filler, "_"); push_deco(decode(substr($0,2))); next; } /^[2]/{ if (abort >= 0) { exit(abort); } for (i=1;i= 0) { exit(abort); } flush_tup(); # close last directive print_text("\n", ""); print_color_table(colors, xcolor, tcolor); } function init_tup( i) { tup = ""; for (i=1;i "/dev/stderr"; if (m == "") { error("push_tup: empty m"); } # extend current tuple with new character: tup = (tup m); ext[order] = c; glu[order] = ""; val[order] = 0; if (wait == 0) { # Now "tup" must have "order" characters. # Find its value "v" and insert it in the "val" buffer: if (tup in vtable) { v = vtable[tup]; } else { v = hmax; } val[place] = v; # write out the first character of "tup": print_tup_head(); } else { wait--; } # Shift buffer left by 1: pop_tup(); } function push_deco(s) { if (order > 1) { glu[order-1] = (glu[order-1] s); } else { print_text(s, ""); } } function print_tup_head( v) { # Prints fist character in "tup" buffer, and associated glue: print_text(ext[1], xcolor_from_value(val[1], colors, hmin, hmax)); print_text(glu[1], ""); } function pop_tup( i) { # shift out first position of "tup" buffer and acessories: tup = substr(tup,2,order-1); for (i=1; i"; } if (color != "") { printf "", color; } current_color = color; } gsub(/[&]/, "\\&", str); gsub(/[<]/, "\\<", str); gsub(/[>]/, "\\>", str); printf "%s", str; } } function check_options( i,c,mk,ucs,lcs,uc,lc) { # Analyzes/defaults the option variables, namely # # "order" "place" # "table" # "filler" "lowercase" # "hmin" "hmax" # "ymin" "ymax" "colors" # # Defines the global variable "map" that maps characters to lowercase # if so desired. if (order == "") { error("should define \"order\""); } if ((order < 1) || (order > 20)) { error("funny \"order\""); } if (place == "") { error("should define \"place\""); } if ((place < 1) || (place > order)) { error("funny \"place\""); } if (filler == "") { filler = "_"; } if (length(filler) != 1) { error(("the \"filler\" should be a single char")); } # --- lowercase mapping ---------------------------------------------- split("", map); for (i=0;i<256;i++) { c = sprintf("%c", i); map[c] = c; } if (lowercase == "") { lowercase = 1; } if (lowercase > 0) { ucs = "ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ"; lcs = "abcdefghijklmnopqrstuvwxyzàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþ"; for (i=1;i<=length(ucs);i++) { uc = substr(ucs,i,1); lc = substr(lcs,i,1); map[uc] = lc; } } if (table == "") { error("should define \"table\""); } if (hmin == "") { error("should define \"hmin\""); } if (hmax == "") { error("should define \"hmax\""); } if (hmin >= hmax) { error("funny \"hmin\",\"hmax\""); } if (ymin == "") { ymin = 0.25; } if (ymax == "") { ymax = 1.00; } if (colors == "") { colors = 9; } } function read_value_table(table, order, lin,fld,v,w) { # Reads the table "vtable" that maps tuples to values. while ( getline lin < table ) { split(lin, fld); if ((3 in fld) || !(2 in fld)) { error("file " table " line " NR ": bad format"); } v = fld[1]; w = fld[2]; if (length(w) != order) { error("file " table " line " NR ": wrong word length"); } if (v !~ /^[-+]?[0-9]+[.]?[0-9]*$/) { error("file " table " line " NR ": bad value"); } vtable[w] = v; } } function error(msg) { printf "%s\n", msg >> "/dev/stderr"; abort = 1; exit 0; } function xcolor_from_value(v, colors, hmin, hmax, i) { # Maps a letter value "v" in "[hmin _ hmax]" # to an X color value: if (v < hmin) { return xcolor[-1]; } else if (v >= hmax) { return xcolor[colors]; } else { i = int(colors*(v - hmin)/(hmax - hmin)); return xcolor[(i < 0 ? 0 : (i >= colors ? colors-1 : i))]; } } function make_color_table(colors, hmin, hmax, ymin, ymax, i,vlo,vhi) { # Defines and prints the color table "xcolor[i]" used by # "xcolor_from_value". Also sets "tcolor[i] to the # correspoding legend. xcolor[-1] = xcolor_from_ratio(0, ymin, ymax); tcolor[-1] = sprintf(" 0.000 to %6.3f bits", hmin-0.001); for (i=0;i= -1; i--) { printf "# %s\n", xcolor[i], tcolor[i]; } printf "\n"; } function xcolor_from_ratio(r, ymin, ymax, y,rgb) { # Maps the parameter "r" from "[0_1]" to # a color with intensity in "[ymin _ ymax]". r = (r < 0 ? 0 : (r > 1 ? 1 : r)); rgb_from_hue(rgb, 0.6667 - 1.25*r); # rgb_from_hue(rgb, r); y = ymin*exp((r)*log(ymax/ymin)); rgb_fix_y(rgb, y); # printf "r = %7.3f y = %7.5f rgb = (%7.5f,%7.5f,%7.5f)\n", \ # r, y, rgb[0], rgb[1], rgb[2] > "/dev/stderr"; return(xcolor_from_rgb(rgb)); } function abs(x) { return (x >= 0 ? x : -x) } function rgb_from_hue(rgb, h, hf, hi) { while (h >= 1) { h = h - 1; } while (h < 0) { h = h + 1; } h = 6*h; hi = int(h); hf = h - hi; if (hi == 0) { rgb[0] = 1; rgb[1] = hf; rgb[2] = 0; } else if (hi == 1) { rgb[0] = 1-hf; rgb[1] = 1; rgb[2] = 0; } else if (hi == 2) { rgb[0] = 0; rgb[1] = 1; rgb[2] = hf; } else if (hi == 3) { rgb[0] = 0; rgb[1] = 1-hf; rgb[2] = 1; } else if (hi == 4) { rgb[0] = hf; rgb[1] = 0; rgb[2] = 1; } else if (hi == 5) { rgb[0] = 1; rgb[1] = 0; rgb[2] = 1-hf; } } function y_from_rgb(rgb) { return 0.30*rgb[0] + 0.60*rgb[1] + 0.10*rgb[2]; } function rgb_fix_y(rgb, y, yy, ar, aw, ab) { # mixes white or black into "rgb" so that its intensity is "y". yy = y_from_rgb(rgb); if (yy < y) { # mix white ar = (1-y)/(1-yy); aw = (y-yy)/(1-yy); rgb[0] = ar*rgb[0] + aw; rgb[1] = ar*rgb[1] + aw; rgb[2] = ar*rgb[2] + aw; } else if (yy > y) { # mix black ar = y/yy; rgb[0] = ar*rgb[0] + aw; rgb[1] = ar*rgb[1] + aw; rgb[2] = ar*rgb[2] + aw; } } function gamma(r) { return (r <= 0 ? r : exp(log(r)/2.0)) } function xcolor_from_rgb(rgb, rr, gg, bb) { rr = int(gamma(rgb[0])*255 + 0.5); gg = int(gamma(rgb[1])*255 + 0.5); bb = int(gamma(rgb[2])*255 + 0.5); return sprintf("%02x%02x%02x", rr, gg, bb); } function xcolor_from_r_g_b_y(r,g,b,y, rgb) { split("", rgb); rgb[0] = r; rgb[1] = g; rgb[2] = b; rgb_fix_y(rgb, y); return(xcolor_from_rgb(rgb)); }