# Last edited on 2002-01-16 01:51:27 by stolfi # Defines functions to perform Roman-like number encoding and decoding. # To be included in gawk programs with the sub-package roman-encoding-XXX.gawk, # for some XXX. The client must call renc_init() from that sub-package # before any use of renc_encode or renc_decode. function renc_encode(num, n,res,d0,d1,d2,d3) { n = num; d0 = (n % renc_base0); n = (n - d0)/renc_base0; d1 = (n % renc_base1); n = (n - d1)/renc_base1; d2 = (n % renc_base2); n = (n - d2)/renc_base2; d3 = (n % renc_base3); n = (n - d3)/renc_base3; d4 = (n % renc_base4); n = (n - d4)/renc_base4; if (n != 0) { renc_error(("index " num " is too big to encode")); } res = ""; res = renc_add_digit(res, renc_digs4[d4]); res = renc_add_digit(res, renc_digs3[d3]); res = renc_add_digit(res, renc_digs2[d2]); res = renc_add_digit(res, renc_digs1[d1]); res = renc_add_digit(res, renc_digs0[d0]); return res; } function renc_add_digit(code,dgcode, fld) { # Adds a digit "dgcode" around the given "code" word. # "dgcode" must have a ":" in it that specifies where to insert "code". split(dgcode, fld, ":"); return ( fld[1] code fld[2] ); } function renc_decode(code,errcode, c,num,d0,d1,d2,d3) { # If the "code" is a valid code, returns its numeric value. # Otherwise returns the arbitrary string "errcode". c = ( code ":0" ) c = renc_remove_digit(c, renc_digs0, renc_mul0); c = renc_remove_digit(c, renc_digs1, renc_mul1); c = renc_remove_digit(c, renc_digs2, renc_mul2); c = renc_remove_digit(c, renc_digs3, renc_mul3); c = renc_remove_digit(c, renc_digs4, renc_mul4); if (c !~ /[:]/) { return errcode; } return 0 + substr(c,2); } function renc_remove_digit(cdn,digs,mul, \ code,num,ndgmax,skip,dgval,nc,i,dgcode,ndg,fld,npr,nsf \ ) { # The "cdn" argument should have the form "code:num" where "code" is a # codeword and "num" a decimal number. Removes a maximal-length # prefix-suffix pair from of "code" that matches some digit pattern # "digs[i]". Then returns "codeN:numN" where "codeN" is "code" with # "digs[i]" removed, "numN = num = mul*i". Each digit pattern must # have a ":" separating the prefix and suffix parts. split(cdn,fld,":"); code = fld[1]; num = fld[2]; ndgmax = -1; skip = -1; dgval = -1; nc = length(code); for (i in digs) { dgcode = digs[i]; ndg = length(dgcode) - 1; if (ndg <= nc) { split(dgcode, fld, ":"); npr = length(fld[1]); nsf = length(fld[2]); if ((substr(code,1,npr) == fld[1]) && (substr(code,nc-nsf+1) == fld[2])) { if (ndg > ndgmax) { ndgmax = ndg; skip = npr; dgval = i; } } } } if (ndgmax >= 0) { return (substr(code, skip+1, nc-ndgmax) ":" num + mul*dgval); } else { # renc_error(("no digit matches \"" code "\"")); return ""; } } function renc_error(msg) { printf "%s\n", msg > "/dev/stderr"; abort = 1; exit 1 }