#! /usr/bin/python3
# Last edited on 2019-04-20 03:37:53 by jstolfi

def roman_decode(x):
    # Parses a string {x} as a Roman numeral and 
    # returns its integer value. 
    #
    # Understands the additive variant, so that
    # "IIII", "VIIII", "XXXX", etc. are mapped to 4,9,40, etc.
    # Also accepts the rare "doubly subtractive" convention
    # that writes "IIX" for 8, "XXC" for 80, and "CCM" for 800.
    # However does NOT accept "IIV", "XXL", or "CCD".
    # Mixed conventions are okay, so "CCCCLXXXIIX" is
    # converted to 488.  
    #
    # The letters must be uppercase, and there must be no
    # blanks or any other extraneous character in {x}.
    # 
    # The largest number accepted is "MMMMCMXCIX" = 4999.
    # The smallest is "I" = 1; but if {x} is the empty
    # string, returns 0.
    # 
    # If the string {x} is not a valid Roman numeral by these
    # rules, returns {None}.

    if (type(x) != type("foo")): return None # Requires {x} to be a string.
    v = 0
    for p in (3,2,1,0): 
        (d,x) = roman_decode_section(x,p)
        if (d == None): return None
        v = v*10 + d
    if (x != ""): return None
    return v

def roman_decode_section(x, p):
    # Parses the leading decimal section of the string {x}, which is
    # assumed to be a valid Roman numeral, all in upper case. 
    #
    # The section should specify the multiple of {10^p} (thousands,
    # hundreds, tens, or units) in the Roman numeral. Assumes that {x}
    # has no higher powers of 10; that is, the value of {x} must be less
    # than {10^(p+1}}.
    #
    # Returns a pair {d,r} where {d} is the multiplier of {10^p} in {x}, an integer
    # from 1 to 9; or 0 if {x} has no {10^p} section. If the multiplier is not
    # valid by Roman rumeral rules (of any style), returns {d=None}.
    # In any case, {r} will be the unparsed part of {x}.
    # 
    # For example, if {x} is "LXXIX" (79), the leading section for {p=1} 
    # is "LXX" (70), and the procedure returns {d=7} and {r="IX"}.  For 
    # {p=2} and {p=3}, the procedure returns {d=0} and {r="LXXIX"}

    smb = ( "IVX", "XLC", "CDM", "MMM" )[p] # Letters allowed in this section. 
    s = "";    # The {10^p} section, with symbols mapped to "IVX".
    while True:
        if (x == ""):
            break
        elif (x[0] == smb[0]): 
            s = s + "I"
        elif (x[0] == smb[1]):
            s = s + "V"
        else:
            break
        x = x[1:] # Remove the first character from {x}.
    # There may still be a {smb[2]}:
    if ((s != "") and (x != "") and (x[0] == smb[2])):
        s = s + "X"; x = x[1:]
    # Translate {s} to a digit:
    tab = { 
        "": 0, "I": 1, "II": 2, "III":3, 
        "IIII": 4, "IV": 4,
        "V": 5, "VI": 6, "VII": 7,
        "VIII": 8, "IIX": 8,
        "VIIII": 9, "IX": 9
    }
    if (s in tab):
        return tab[s],x
    else:
        return None, x

def roman_encode(n, style):
    # Encodes the integer {n} as an uppercase Roman numeral.
    #
    # If {style=0} uses the ancient "additive" version, where
    # 4 is "IIII", 9 is "VIIII", and similarly for the
    # tens, hundreds, and thousands.  The maximum value of {n}
    # is then 4999.
    #
    # If {style=1}, uses the modern "subtractive" version,
    # where 4 is "IV", 9 is "IX", and similarly for the other
    # powers of ten.  The maximum value of {n} is then 3999.
    #
    # If {style=2}, uses the rare "doubly subtractive" version,
    # where 8 is "IIX" (not "VIII"), 80 is "XXC", and 800 is "CCM".
    # The maximum value of {n} is then 3999.
    #
    # If the value of {n} is negative, or exceeds the maximum, the
    # function returns {None}.  For {n=0}, it returns the empty
    # string.

    if (type(n) != type(418)): return None # Requires {n} to be an integer.
    if (n < 0): return None
    x = ""
    for p in (3,2,1,0):
        w = 10**p
        d = n // w; n = n % w
        s = roman_encode_digit(d, p, style)
        if (s == None): return None
        x = x + s
    return x

def roman_encode_digit(d, p, style):
    # Encodes the digit {d}, from 0 to 9, as the
    # uppercase Roman numeral with value {d*10^p}.
    #
    # The {style} parameter is as in {roman_encode}.
    # If {d} is zero, returns the empty string.
    #
    # For {p=3} (thousands), the maximum value of
    # {d} is 4 if {style=0} (additive notation),
    # which yields "MMMM"; and 3 otherwise
    # (subtractive notations) which yields "MMM".

    # Check range of {d}:
    if (d < 0): return None     # Just in case.
    if (p == 3):
        if (style == 0):
            if (d > 4): return None
        else:
            if (d > 3): return None
    else:
        if (d > 9): return None

    smb = ( "IVX", "XLC", "CDM", "M??" )[p] # Letters to use in this section.
    s1 = smb[0];  # Units symbol.
    s5 = smb[1];  # Fives symbol.
    s10 = smb[2]; # Tens symbol.
    if (style == 0):
        if (d <= 4):
            return s1*d
        else: # {d} is 5 to 9:
            return s5 + (s1*(d-5))
    else: # {style} is 1 or 2:
        if (d <= 4):
            return s1*d
        elif (d == 4):
            return s1 + s5
        elif (d <= 8):
            if ((d == 8) and (style == 2)):
                return s1 + s1 + s10
            else:
                return s5 + (s1*(d-5))
        else: # {d} is 9:
            return s1 + s10

# Testing
print("Testing encode + decode...")
for n1 in range(5002):
    n = n1 - 1
    print("%6d " % n, end="")
    for style in (0,1,2):
        # Set {ok} iff {n} is in valid range:
        nmax = (4999 if style == 0 else 3999)
        ok = ((n >= 0) and (n <= nmax))
        # Test conversion to Roman:
        x = roman_encode(n, style)
        # Flag if {x} is {None} when it shouldn't:
        bug_e = ("**e" if ((x == None) == ok) else "   ") 
        if (not ok): x = "None"
        m = roman_decode(x)
        # Flag if {m} is {None} when it shouldn't:
        bug_d = ("**d" if ((m == None) == ok) else "   ") 
        bra_x = "[" + x + "]"
        print("%s %s %-22s " % (bug_e, bug_d, bra_x), end="")
    print("")
print("Testing decode on some invalid numerals...")
bad = [
    "IIIII", "XXXXX", "CCCCC", "MMMMM",
    "VIIIII", "LXXXXX", "DCCCCC", 
    "VVI",   "LLX", "DDC", 
    "VIV",   "LXL", "DCD",
    "IVV",   "XLL", "CDD",
    "IIV",   "XXL", "CCD",
    "VIX",   "LXC", "DCM",
    "IIIX",  "XXXC", "CCCM",
    "IM", 
    "XIC",   "CXM",  "CIM",
    "VV",    "LL",   "DD",
    "VX",    "LC",   "DM",
    "IC",    "XM",
    "ID",    
]
for x in bad:
    print("[%s] = " % x, end = "")
    n = roman_decode(x)
    if (n == None):
        print("None")
    else:
        print("%d ** decode bug" % n)
print("done.")