#! /bin/gawk -f # Last edited on 2002-01-04 13:58:54 by stolfi BEGIN { abort = -1; usage = ( \ "cat main.org \\\n" \ " | find-words-in-main-org \\\n" \ " -v words=WORDFILE [ -v times=NUM ] \\\n" \ " > occurrences.txt" \ ); # Reads a list of words from WORDFILE, outputs the lines # containing the first TIMES occurrences of each of those words, # with the corresponding unit name # (from the preceding @unit lines). if (words == "") { arg_error(("must define \"words\"")); } if (times == "") { times = 1; } load_special_words(words, times); curunit = "NIL:0:0"; PUNCT = ""; } /^[ ]*[@]unit/ { curunit = $4; } /^ *[#] *PUNCT *[=] *["].*["] *$/ { PUNCT = (PUNCT get_val($0)); if (PUNCT != "") { punct_pat = ( "[" PUNCT "]" ); } } function get_val(def) { gsub(/^[#][ ]*[A-Z]+ *= *"/, "", def); gsub(/" *$/, "", def); return(quote_special(def)); } function quote_special(chars) { gsub(/[\\]/, "\\\\", chars); gsub(/[-]/, "\\-", chars); gsub(/[\]]/, "\\]", chars); gsub(/[\^]/, "\\^", chars); return chars; } /^[ ]*([#@]|$)/ { next; } // { lin = $0; if (PUNCT != "") { gsub(punct_pat, " ", $0); } sel = 0; for (i = 1; ((i <= NF) && (! sel)); i++) { w = $(i); if ((w in maxocs) && (maxocs[w] > 0)) { maxocs[w]--; sel = 1; } } if (sel) { gsub(/^ +/, "", lin); printf "%-8s %s\n", curunit, lin; } next; } function load_special_words(file,moc, nWords,lin,fld,nfld) { # Reads a word list from "file". # For each word W, sets "maxocs[W] = moc". nWords=0; split("", maxocs) while((getline lin < file) > 0) { gsub(/^[ ]*/, "", lin); if (! match(lin, /^[#]/)) { gsub(/[ ]*[#].*$/, "", lin); nfld = split(lin, fld, " "); if (nfld != 1) tbl_error(file, ("bad table entry = \"" lin "\"")); if (fld[1] in maxocs) tbl_error(file, ("repeated key = \"" lin "\"")); maxocs[fld[1]] = moc; nWords++; } } if (ERRNO != "0") { arg_error((file ": " ERRNO)); } close (file); if (nWords == 0) { printf "warning: file \"" file "\" empty or missing\n" > "/dev/stderr"; } else { printf "loaded %6d words\n", nWords > "/dev/stderr"; } } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort=1; exit 1; } function data_error(msg) { printf "line %d: %s\n", FNR, msg > "/dev/stderr"; abort = 1; exit 1; } function tbl_error(file, msg) { printf "file %s, line %s: %s\n", file, FNR, msg > "/dev/stderr"; abort = 1; exit 1; }