# Last edited on 2000-09-21 16:51:10 by stolfi # To be included in gawk scripts function extract_gallows_attr(word, w) { # Returns the number of gallows in "word", or -1 if the # word contains bad characters. if (word ~ /[^a-z]/) { # printf "%20s %20s %2d\n", word, "?", -1 > "/dev/stderr"; return(-1); } else { w = word; gsub(/[^ktpf]/, "",w); # printf "%20s %20s %2d\n", word, w, length(w) > "/dev/stderr"; return(length(w)); } } function extract_mantle_attr(word,ct2,ctplat,ctke, w) { # Returns the number of table characters in "word", or -1 is the word # contains bad characters. # If "ct2=1", counts "ch", "sh", "ee" as 2 tablechars each. # If "ctplat=1", counts gallows platforms as a "ch". # If "ctke=1", counts single "e" after gallows/tablechar as tablechar. if (word ~ /[^a-z]/) { # printf "%20s %20s %2d\n", word, "?", -1 > "/dev/stderr"; return (-1); } else { w = word; gsub(/[ktpf]/,"k",w); gsub(/[i]k[h]/,"ckh",$3); if (ctplat) { gsub(/[c]k[h]*/,"Ck",w); } else { gsub(/[c]k[h]*/,"k",w); } if (ctke) { gsub(/k[e][e][e][e]/,"kCC",w); gsub(/k[e][e][e]/,"keC",w); gsub(/k[e][e]/,"kC",w); gsub(/k[e]/,"ke",w); } else { gsub(/k[e][e][e][e]/,"kCC",w); gsub(/k[e][e][e]/,"kC",w); gsub(/k[e][e]/,"kC",w); gsub(/k[e]/,"k",w); } gsub(/[cs]h/,"C",w); gsub(/ee/,"C",w); gsub(/se/,"C",w); gsub(/es/,"C",w); gsub(/[ceh]/,"e",w); if (! ctke) { gsub(/Ce/,"C",w); } if (ct2) { gsub(/C/,"ee",w); } else { gsub(/C/,"e",w); } gsub(/[^e]/,"",w); # printf "%20s %20s %2d\n", word, w, length(w) > "/dev/stderr"; return(length(w)); } } function extract_e_d_attr(word, w) { # Returns "-1" if the word is invalid, # "1" if the word contains the "ed" digraph, # "0" if it doesn't. if (word ~ /[^a-z]/) { return(-1); } else if (word ~ /[e][d]/) { return(1); } else { return(0); } } function extract_random_attr(word,prob, w) { # Returns -1 if the word is invalid, else # returns a random bit which is 1 with probability "prob". if (word ~ /[^a-z]/) { return(-1); } else { return (rand() <= prob); } }