# Last edited on 2002-01-18 15:11:22 by stolfi # Factors a text by placing "{}" around each basic glyph. # Assumes that the field is encoded in EVA. # To be included in factor-field-general etc. function factor_text(x, y,e) { # Assumes x is EVA. Factors it into # basic glyphs, wraps each glyph in braces. # Font capitalization is ignored on input and # added to the output. # Clean up any junk: gsub(/{[^{}]*}/, "", x); gsub(/[!]/, "", x); # Make sure that the input has ligature capitalizations: x = uncapitalize_ligatures(x); x = capitalize_ligatures(x); if (match(x, /[^-=\/,. *?%a-zA-Z]/)) { data_error(("invalid char in word \"" x "\"")); } # Split word into basic glyphs: y = ""; while (x != "") { if (match(x, /^[-=\/,. ]+/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); y = ( y e ); } else { match(x, /^([A-Z]*[a-z]|[A-Z]+[?][A-Z?]*[h]|[?][KTPFH]*[h]|[A-Z]*[?])/); if (RSTART != 1) { data_error("duh?"); } e = substr(x, 1, RLENGTH); x = substr(x, RLENGTH+1); y = ( y "{" e "}"); } } return y; } function capitalize_ligatures(w) { # Capitalize ligatures: gsub(/ch/, "Ch", w); gsub(/sh/, "Sh", w); gsub(/ckh/, "CKh", w); gsub(/ikh/, "IKh", w); gsub(/[?]kh/, "?Kh", w); gsub(/cth/, "CTh", w); gsub(/ith/, "ITh", w); gsub(/[?]th/, "?Th", w); gsub(/cph/, "CPh", w); gsub(/iph/, "IPh", w); gsub(/[?]ph/, "?Ph", w); gsub(/cfh/, "CFh", w); gsub(/ifh/, "IFh", w); gsub(/[?]fh/, "?Fh", w); gsub(/c[?]h/, "C?h", w); gsub(/i[?]h/, "I?h", w); gsub(/c[?]/, "C?", w); gsub(/hh/, "Hh", w); return w; } function uncapitalize_ligatures(w) { # Removes ligature-capitalization: gsub(/C/, "c", w); gsub(/S/, "s", w); gsub(/I/, "i", w); gsub(/H/, "h", w); gsub(/K/, "k", w); gsub(/T/, "t", w); gsub(/P/, "p", w); gsub(/F/, "f", w); gsub(/Y/, "y", w); gsub(/O/, "o", w); gsub(/A/, "a", w); return w; }