#! /usr/bin/gawk -f
# Last edited on 2003-07-21 22:22:01 by stolfi
# Reads an html page CTX that is obtained by clicking a "context"
# button in a NEC "Citation Search" report page.
# Outputs a list of links to the papers listed in the
# CTX page.
BEGIN { grab = 0; split ("", fld); printf "@\n" > "/dev/stderr"; }
/
*$/ {
printf "*" > "/dev/stderr";
grab = 1; next;
}
/^[<][a]/ {
if (grab)
{ lin = $0;
printf "!\n" > "/dev/stderr";
if (match(lin, /[<] *a +href *[=] *[\"]([^\"<>]*)[\"] *[>]/, fld))
{ url = fld[1];
tit = fld[2];
gsub(/[ ]/, "_", tit);
printf "%s \n", url, tit;
}
else
{ data_error(("no url?")); }
grab = 0;
}
next;
}
// { next; }
function data_warning(msg)
{
printf "%s:%d: ++ Warning: %s\n", FILENAME, FNR, msg > "/dev/stderr";
printf " $0 = \"%s\"\n", $0 > "/dev/stderr";
}
function data_error(msg)
{
printf "%s:%d: ** %s\n", FILENAME, FNR, msg > "/dev/stderr";
printf " $0 = \"%s\"\n", $0 > "/dev/stderr";
abort = -1;
exit abort;
}