#! /usr/bin/gawk -f # Last edited on 2003-07-21 22:22:01 by stolfi # Reads an html page CTX that is obtained by clicking a "context" # button in a NEC "Citation Search" report page. # Outputs a list of links to the papers listed in the # CTX page. BEGIN { grab = 0; split ("", fld); printf "@\n" > "/dev/stderr"; } /
*$/ { printf "*" > "/dev/stderr"; grab = 1; next; } /^[<][a]/ { if (grab) { lin = $0; printf "!\n" > "/dev/stderr"; if (match(lin, /[<] *a +href *[=] *[\"]([^\"<>]*)[\"] *[>]/, fld)) { url = fld[1]; tit = fld[2]; gsub(/[ ]/, "_", tit); printf "%s \n", url, tit; } else { data_error(("no url?")); } grab = 0; } next; } // { next; } function data_warning(msg) { printf "%s:%d: ++ Warning: %s\n", FILENAME, FNR, msg > "/dev/stderr"; printf " $0 = \"%s\"\n", $0 > "/dev/stderr"; } function data_error(msg) { printf "%s:%d: ** %s\n", FILENAME, FNR, msg > "/dev/stderr"; printf " $0 = \"%s\"\n", $0 > "/dev/stderr"; abort = -1; exit abort; }