from __future__ import division

import itertools as IT

VOWELS = "AEIOUY"
CONSONANTS = "BCDFGHJKLMNPQRSTVWXZ"

def join_S_V(res):
    res = "".join(res)
    while "SV" in res: res = res.replace("SV", "S")
    while "VS" in res: res = res.replace("VS", "S")
    return res
    
def sign(word, special):
    res= []
    i = 0
    while i < len(word):
        if word[i:i+2] == special:
            x = 'S'
            i += 1
        elif word[i] in CONSONANTS:
            x = 'C'
        elif word[i] in VOWELS:
            x = 'V'
        i += 1
        res.append(x)
    res = join_S_V(res)
    return [k for k,_ in IT.groupby(res)]
    
def ph_sign(phon, special="EY"):
    res= []
    for p in phon:
        c = p[:2]
        if  c == special:
            x = 'S'
        elif c[0] in CONSONANTS or c == "ER":
            x = 'C'
        elif c[0] in VOWELS:
            x = 'V'
        res.append(x)
    res = join_S_V(res)
    return [k for k,_ in IT.groupby(res)]
    
def match(word, phon, special):
    if word.startswith("MC"): word = "MAC" + word[2:]
    s1 = sign(word, special)
    s2 = ph_sign(phon)
    return ('S', 'S') in zip(s1,s2)
    
def check_pos(word, phon, special):
    ind = word.index(special)
    tol = 2
    plen = len(phon)
    wlen = len(word)
    pind = [i for i, p in enumerate(phon) if p.startswith("EY")]
    if not pind:
        return False
    wordpos = (ind + 0.5)
    phonpos = [i / plen * wlen for i in pind]
    return any(abs(p - wordpos) <= tol for p in phonpos)
    
def sounds_like(line, special):
    e = line.split()
    word = e[0]
    phon = e[1:]
    if not check_pos(word, phon, special):
        return False
    if not any(p[:2] == "EY" for p in phon):
        return False
    return match(word, phon, special)
    
def against_rule2(line):
    word = line.split()[0]
    ind = word.find("IE")
    if  ind == -1:
        return False
    return sounds_like(line, "IE")
    
def against_rule3(line):
    word = line.split()[0]
    ind = word.find("EI")
    if  ind == -1:
        return False
    if word.find("CEI") >= 0:
        return False
    return not sounds_like(line, "EI")
    
def ibeforee_exceptions():
    """ the following combinations are against the rule:
        1. CIE
        2. IE that sound like EY[012]
        3. EI that do not sound like EY[012] and have no C in front
        What about:
            EIE like in German names, which are on the list
            AIE like MAIER (M EY1 ER0) where the EY1 is more for AI than IE
            DIEGO (D IY0 EY1 G OW0) I and E are separate sounds
            ATHEIST where E and I are separate sounds
            DOSSIER (D AO2 S Y EY10)
            FRASIER'S  F R EY1 ZH ER0 Z EY1 is for A
    """
    f = open("cmudict_ie.txt")
    words = 0
    violate1 = violate2 = violate3 = 0
    with open("cmudict_ie.txt") as f:
            for line in f:
                words += 1
                line = line[:-1]
                elements = line.strip().split()
                line = " ".join(elements)
                word = elements[0]
                if "CIE" in word:
                    print "rule 1:", line
                    violate1 += 1
                elif against_rule2(line):
                    print "rule 2:", line
                    violate2 += 1
                elif against_rule3(line):
                    print "rule 3:", line
                    violate3 += 1
            print "number of words", words
            print "CIE            ", violate1
            print "IE that sound like EY[012]", violate2
            print "EI that do not sound like EY[012] and have no C in front", violate3
            print "total number violations", violate1 + violate2 + violate3
    
ibeforee_exceptions()