# Last edited on 2002-01-20 22:35:12 by stolfi function slot_extract(w) { # Extracts the vowel segment from a Vietnamese VIQR word. # Removes the tone mark, if any, but leaves the vowel quality diacritics. # Remove the tone mark: gsub(/[`'.?~]/, "", w); # Remove initial and final consonants: gsub(/^[b-df-hj-np-tvwxzB-DF-HJ-NP-TVWXZ]+/, "", w); gsub(/[b-df-hj-np-tvwxzB-DF-HJ-NP-TVWXZ]+$/, "", w); # Return what is left: return w; } function slot_factor(w) { # Trivial factorization (VIQR bytes) return gensub(/(.)/, "{\\1}", "g", w); }