# Functions for textual equivalence # To be included by gawk scripts # ======================================================================== # Functions for finding similar words: function weq_reduce(txt) { # Reduces a word or phrase to a pattern. # Assumes all word separators have been mapped to ".". # Assumes the following global variables: # # equatekt: TRUE to map "t" to "k" # equatepf: TRUE to map "f" to "p" # stripq: TRUE to remove word-initial "q" txt = ("." txt "."); if(stripq) { gsub(/[.]q/, ".", txt); } if(equatekt) { gsub(/t/, "k", txt); } if(equatepf) { gsub(/f/, "p", txt); } gsub(/[.]y/, ".o", txt); gsub(/qy/, "qo", txt); gsub(/[y][.]/, "o.", txt); gsub(/eeee/, "chch", txt); gsub(/eee/, "che", txt); gsub(/ee/, "ch", txt); gsub(/^[.][.]*/, "", txt); gsub(/[.][.]*$/, "", txt); gsub(/[.][.][.]*/, ".", txt); return txt } # ======================================================================== # Functions for erasing comments: function weq_erase_comments(old, new, i) { # Removes '{}' comments and other fillers from "old". new = ""; while (length(old) != 0) { i = index(old, "{"); if (i == 0) { new = (new old); old = ""; } else if (i > 1) { new = (new substr(old, 1, i-1)); old = substr(old, i); } else { match(old, /^{[^{}]*}/); if (RSTART > 0) { old = substr(old, RLENGTH + 1); } else { printf "line %d, bad {}-comment\n", NR > "/dev/stderr"; new = (new old); old = ""; } } } return new; }