Advertisement
ploffie

ibeforee

Dec 3rd, 2013
135
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.77 KB | None | 0 0
  1. from __future__ import division
  2.  
  3. import itertools as IT
  4.  
  5. VOWELS = "AEIOUY"
  6. CONSONANTS = "BCDFGHJKLMNPQRSTVWXZ"
  7.  
  8. def join_S_V(res):
  9.     res = "".join(res)
  10.     while "SV" in res: res = res.replace("SV", "S")
  11.     while "VS" in res: res = res.replace("VS", "S")
  12.     return res
  13.    
  14. def sign(word, special):
  15.     res= []
  16.     i = 0
  17.     while i < len(word):
  18.         if word[i:i+2] == special:
  19.             x = 'S'
  20.             i += 1
  21.         elif word[i] in CONSONANTS:
  22.             x = 'C'
  23.         elif word[i] in VOWELS:
  24.             x = 'V'
  25.         i += 1
  26.         res.append(x)
  27.     res = join_S_V(res)
  28.     return [k for k,_ in IT.groupby(res)]
  29.    
  30. def ph_sign(phon, special="EY"):
  31.     res= []
  32.     for p in phon:
  33.         c = p[:2]
  34.         if  c == special:
  35.             x = 'S'
  36.         elif c[0] in CONSONANTS or c == "ER":
  37.             x = 'C'
  38.         elif c[0] in VOWELS:
  39.             x = 'V'
  40.         res.append(x)
  41.     res = join_S_V(res)
  42.     return [k for k,_ in IT.groupby(res)]
  43.    
  44. def match(word, phon, special):
  45.     if word.startswith("MC"): word = "MAC" + word[2:]
  46.     s1 = sign(word, special)
  47.     s2 = ph_sign(phon)
  48.     return ('S', 'S') in zip(s1,s2)
  49.    
  50. def check_pos(word, phon, special):
  51.     ind = word.index(special)
  52.     tol = 2
  53.     plen = len(phon)
  54.     wlen = len(word)
  55.     pind = [i for i, p in enumerate(phon) if p.startswith("EY")]
  56.     if not pind:
  57.         return False
  58.     wordpos = (ind + 0.5)
  59.     phonpos = [i / plen * wlen for i in pind]
  60.     return any(abs(p - wordpos) <= tol for p in phonpos)
  61.    
  62. def sounds_like(line, special):
  63.     e = line.split()
  64.     word = e[0]
  65.     phon = e[1:]
  66.     if not check_pos(word, phon, special):
  67.         return False
  68.     if not any(p[:2] == "EY" for p in phon):
  69.         return False
  70.     return match(word, phon, special)
  71.    
  72. def against_rule2(line):
  73.     word = line.split()[0]
  74.     ind = word.find("IE")
  75.     if  ind == -1:
  76.         return False
  77.     return sounds_like(line, "IE")
  78.    
  79. def against_rule3(line):
  80.     word = line.split()[0]
  81.     ind = word.find("EI")
  82.     if  ind == -1:
  83.         return False
  84.     if word.find("CEI") >= 0:
  85.         return False
  86.     return not sounds_like(line, "EI")
  87.    
  88. def ibeforee_exceptions():
  89.     """ the following combinations are against the rule:
  90.        1. CIE
  91.        2. IE that sound like EY[012]
  92.        3. EI that do not sound like EY[012] and have no C in front
  93.        What about:
  94.            EIE like in German names, which are on the list
  95.            AIE like MAIER (M EY1 ER0) where the EY1 is more for AI than IE
  96.            DIEGO (D IY0 EY1 G OW0) I and E are separate sounds
  97.            ATHEIST where E and I are separate sounds
  98.            DOSSIER (D AO2 S Y EY10)
  99.            FRASIER'S  F R EY1 ZH ER0 Z EY1 is for A
  100.    """
  101.     f = open("cmudict_ie.txt")
  102.     words = 0
  103.     violate1 = violate2 = violate3 = 0
  104.     with open("cmudict_ie.txt") as f:
  105.             for line in f:
  106.                 words += 1
  107.                 line = line[:-1]
  108.                 elements = line.strip().split()
  109.                 line = " ".join(elements)
  110.                 word = elements[0]
  111.                 if "CIE" in word:
  112.                     print "rule 1:", line
  113.                     violate1 += 1
  114.                 elif against_rule2(line):
  115.                     print "rule 2:", line
  116.                     violate2 += 1
  117.                 elif against_rule3(line):
  118.                     print "rule 3:", line
  119.                     violate3 += 1
  120.             print "number of words", words
  121.             print "CIE            ", violate1
  122.             print "IE that sound like EY[012]", violate2
  123.             print "EI that do not sound like EY[012] and have no C in front", violate3
  124.             print "total number violations", violate1 + violate2 + violate3
  125.    
  126. ibeforee_exceptions()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement