#! /usr/bin/python3 # Last edited on 2019-04-20 03:37:53 by jstolfi def roman_decode(x): # Parses a string {x} as a Roman numeral and # returns its integer value. # # Understands the additive variant, so that # "IIII", "VIIII", "XXXX", etc. are mapped to 4,9,40, etc. # Also accepts the rare "doubly subtractive" convention # that writes "IIX" for 8, "XXC" for 80, and "CCM" for 800. # However does NOT accept "IIV", "XXL", or "CCD". # Mixed conventions are okay, so "CCCCLXXXIIX" is # converted to 488. # # The letters must be uppercase, and there must be no # blanks or any other extraneous character in {x}. # # The largest number accepted is "MMMMCMXCIX" = 4999. # The smallest is "I" = 1; but if {x} is the empty # string, returns 0. # # If the string {x} is not a valid Roman numeral by these # rules, returns {None}. if (type(x) != type("foo")): return None # Requires {x} to be a string. v = 0 for p in (3,2,1,0): (d,x) = roman_decode_section(x,p) if (d == None): return None v = v*10 + d if (x != ""): return None return v def roman_decode_section(x, p): # Parses the leading decimal section of the string {x}, which is # assumed to be a valid Roman numeral, all in upper case. # # The section should specify the multiple of {10^p} (thousands, # hundreds, tens, or units) in the Roman numeral. Assumes that {x} # has no higher powers of 10; that is, the value of {x} must be less # than {10^(p+1}}. # # Returns a pair {d,r} where {d} is the multiplier of {10^p} in {x}, an integer # from 1 to 9; or 0 if {x} has no {10^p} section. If the multiplier is not # valid by Roman rumeral rules (of any style), returns {d=None}. # In any case, {r} will be the unparsed part of {x}. # # For example, if {x} is "LXXIX" (79), the leading section for {p=1} # is "LXX" (70), and the procedure returns {d=7} and {r="IX"}. For # {p=2} and {p=3}, the procedure returns {d=0} and {r="LXXIX"} smb = ( "IVX", "XLC", "CDM", "MMM" )[p] # Letters allowed in this section. s = ""; # The {10^p} section, with symbols mapped to "IVX". while True: if (x == ""): break elif (x[0] == smb[0]): s = s + "I" elif (x[0] == smb[1]): s = s + "V" else: break x = x[1:] # Remove the first character from {x}. # There may still be a {smb[2]}: if ((s != "") and (x != "") and (x[0] == smb[2])): s = s + "X"; x = x[1:] # Translate {s} to a digit: tab = { "": 0, "I": 1, "II": 2, "III":3, "IIII": 4, "IV": 4, "V": 5, "VI": 6, "VII": 7, "VIII": 8, "IIX": 8, "VIIII": 9, "IX": 9 } if (s in tab): return tab[s],x else: return None, x def roman_encode(n, style): # Encodes the integer {n} as an uppercase Roman numeral. # # If {style=0} uses the ancient "additive" version, where # 4 is "IIII", 9 is "VIIII", and similarly for the # tens, hundreds, and thousands. The maximum value of {n} # is then 4999. # # If {style=1}, uses the modern "subtractive" version, # where 4 is "IV", 9 is "IX", and similarly for the other # powers of ten. The maximum value of {n} is then 3999. # # If {style=2}, uses the rare "doubly subtractive" version, # where 8 is "IIX" (not "VIII"), 80 is "XXC", and 800 is "CCM". # The maximum value of {n} is then 3999. # # If the value of {n} is negative, or exceeds the maximum, the # function returns {None}. For {n=0}, it returns the empty # string. if (type(n) != type(418)): return None # Requires {n} to be an integer. if (n < 0): return None x = "" for p in (3,2,1,0): w = 10**p d = n // w; n = n % w s = roman_encode_digit(d, p, style) if (s == None): return None x = x + s return x def roman_encode_digit(d, p, style): # Encodes the digit {d}, from 0 to 9, as the # uppercase Roman numeral with value {d*10^p}. # # The {style} parameter is as in {roman_encode}. # If {d} is zero, returns the empty string. # # For {p=3} (thousands), the maximum value of # {d} is 4 if {style=0} (additive notation), # which yields "MMMM"; and 3 otherwise # (subtractive notations) which yields "MMM". # Check range of {d}: if (d < 0): return None # Just in case. if (p == 3): if (style == 0): if (d > 4): return None else: if (d > 3): return None else: if (d > 9): return None smb = ( "IVX", "XLC", "CDM", "M??" )[p] # Letters to use in this section. s1 = smb[0]; # Units symbol. s5 = smb[1]; # Fives symbol. s10 = smb[2]; # Tens symbol. if (style == 0): if (d <= 4): return s1*d else: # {d} is 5 to 9: return s5 + (s1*(d-5)) else: # {style} is 1 or 2: if (d <= 4): return s1*d elif (d == 4): return s1 + s5 elif (d <= 8): if ((d == 8) and (style == 2)): return s1 + s1 + s10 else: return s5 + (s1*(d-5)) else: # {d} is 9: return s1 + s10 # Testing print("Testing encode + decode...") for n1 in range(5002): n = n1 - 1 print("%6d " % n, end="") for style in (0,1,2): # Set {ok} iff {n} is in valid range: nmax = (4999 if style == 0 else 3999) ok = ((n >= 0) and (n <= nmax)) # Test conversion to Roman: x = roman_encode(n, style) # Flag if {x} is {None} when it shouldn't: bug_e = ("**e" if ((x == None) == ok) else " ") if (not ok): x = "None" m = roman_decode(x) # Flag if {m} is {None} when it shouldn't: bug_d = ("**d" if ((m == None) == ok) else " ") bra_x = "[" + x + "]" print("%s %s %-22s " % (bug_e, bug_d, bra_x), end="") print("") print("Testing decode on some invalid numerals...") bad = [ "IIIII", "XXXXX", "CCCCC", "MMMMM", "VIIIII", "LXXXXX", "DCCCCC", "VVI", "LLX", "DDC", "VIV", "LXL", "DCD", "IVV", "XLL", "CDD", "IIV", "XXL", "CCD", "VIX", "LXC", "DCM", "IIIX", "XXXC", "CCCM", "IM", "XIC", "CXM", "CIM", "VV", "LL", "DD", "VX", "LC", "DM", "IC", "XM", "ID", ] for x in bad: print("[%s] = " % x, end = "") n = roman_decode(x) if (n == None): print("None") else: print("%d ** decode bug" % n) print("done.")