#! /usr/bin/gawk -f # Last edited on 2002-03-05 03:30:53 by stolfi BEGIN{ abort = -1; usage = ( \ "cat INFILE \\\n" \ " | format-soc \\\n" \ " -v title=STRING \\\n" \ " -v showWords=BOOL \\\n" \ " -v showWeak=BOOL \\\n" \ " > OUTFILE " \ ); # Reads a file containing lines of the form # # SEC USEQ FNUM UNIT LINE TRAN FPOS RPOS PFRST PLAST WORD HEAD TAG # 1 2 3 4 5 6 7 8 9 10 11 12 13 # # Assumes that the file is sorted by HEAD, then some fields, # then USEQ (FNUM and UNIT), NLIN, TRAN. Prints all entries # with the same HEAD, in compact format, supressing repeated fields. # If `showWords' is TRUE, prints words too, else locations only. # If `showWeak' is FALSE, omits weak matches. if (title == "") { title = "Occurrences of selected words"; } if (showWords == "") { showWords = 1; } if (showWeak == "") { showWeak = 1; } output_html_header(title); out_line_indent = 2; out_line_width = 56; # Not counting indentation } (abort >= 0) {exit abort;} (NF == 13){ sec = $1; useq = $2; fnum = $3; unit = $4; nlin = $5; tran = $6; fpos = $7; rpos = $8; pfrst = $9; plast = $10; word = $11; head = $12; tag = $13; if ((tag == 0) && (! showWeak)) { next; } if (head != ohead) { if (ohead != "") { finish_head(); } start_head(head); ohead = head; osec = ""; otag = ""; oword = ""; ofnum = ""; ounit = ""; onlin = ""; otran = ""; } if (otran != "") { print_comma(); } if (tag != otag) { if (otag != "") { close_font(); } open_font(tag_color(tag)); otag = tag; } if (showWords) { if (word != oword) { print_word(word); oword = word; } } if (sec != osec) { print_sec(sec); osec = sec; ofnum = ""; ounit = ""; onlin = ""; otran = ""; } if (fnum != ofnum) { print_fnum(fnum); ofnum = fnum; ounit = ""; onlin = ""; otran = ""; } if (unit != ounit) { print_unit(unit); ounit = unit; onlin = ""; otran = ""; } if (nlin != onlin) { print_nlin(nlin); onlin = nlin; otran = ""; } print_tran(tran); otran = tran; next; } END { if (abort >= 0) {exit abort;} if (ohead != "") { finish_head(); } output_html_trailer(); } function output_html_header(title) { printf "\n\n%s\n\n", title; printf "\n\n"; printf "

%s

\n\n
\n", title;
}

function start_head(head,   n,i)
{
  printf "=== %s ", head;
  n = 60 - 5 - length(head);
  for (i = 1; i <= n; i++) { printf "="; }
  printf "\n";
  newline();
}

function tag_color(tag)
{
  if (tag == 0)
    { return "00aa00"; }
  else if (tag == 1)
    { return "ff7700"; }
  else if (tag == 2)
    { return "ffbb00"; }
}

function open_font(clr)
{
  printf "", clr;
}

function print_sec(sec)
{ 
  open_font(sec_color(sec));
  print_string((sec " "));
  close_font();
}

function sec_color(sec)
{
  if (sec == "her")
    { return "00cc00"; }
  else if (sec == "str")
    { return "ff00ff"; }
  else if (sec == "ast")
    { return "00ddff"; }
  else if (sec == "bio")
    { return "ffaa66"; }
  else if (sec == "unk")
    { return "aaaaaa"; }
  else if (sec == "zod")
    { return "ffff00"; }
  else
    { return "88ccff"; }
}

function print_fnum(fnum)
{ 
  print_string(fnum);
}

function print_unit(unit)
{
  if (ounit == "") 
    { print_string(("." unit)); }
  else
    { print_string(unit); }
}

function print_nlin(nlin)
{
  if (onlin == "") 
    { print_string(("." nlin)); }
  else
    { print_string(nlin); }
}

function print_tran(tran)
{
  if (otran == "") 
    { print_string((":" tran)); }
  else
    { print_string(tran); }
}

function print_word(word)
{
  print_string(("(" word ")")); 
}

function close_font()
{
  printf "";
}

function print_comma()
{
  printf ","; cur_line_width++;
  if (cur_line_width + 1 > out_line_width)
    { newline(); }
  else
    { printf " "; cur_line_width++; }
}

function print_string(str,   n)
{
  n = length(str);
  if ((cur_line_width > 0) && (cur_line_width + n > out_line_width))
    { newline(); }
  printf "%s", str; cur_line_width += n;
}

function newline()
{
  printf "\n%*s", out_line_indent, ""; cur_line_width = 0;
}

function finish_head()
{
  if (otag != "") { close_font(); }
  printf "\n\n";
}

function output_html_trailer(title)
{
  printf "
\n\n\n"; } /./{ data_error("bad line type"); } function data_error(msg) { printf "*** line %d: %s\n", FNR, msg > "/dev/stderr"; abort = 1; exit abort; }