Want more features on Pastebin? Sign Up, it's FREE!
Guest

ibeforee

By: ploffie on Dec 3rd, 2013  |  syntax: Python  |  size: 3.77 KB  |  views: 35  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. from __future__ import division
  2.  
  3. import itertools as IT
  4.  
  5. VOWELS = "AEIOUY"
  6. CONSONANTS = "BCDFGHJKLMNPQRSTVWXZ"
  7.  
  8. def join_S_V(res):
  9.     res = "".join(res)
  10.     while "SV" in res: res = res.replace("SV", "S")
  11.     while "VS" in res: res = res.replace("VS", "S")
  12.     return res
  13.    
  14. def sign(word, special):
  15.     res= []
  16.     i = 0
  17.     while i < len(word):
  18.         if word[i:i+2] == special:
  19.             x = 'S'
  20.             i += 1
  21.         elif word[i] in CONSONANTS:
  22.             x = 'C'
  23.         elif word[i] in VOWELS:
  24.             x = 'V'
  25.         i += 1
  26.         res.append(x)
  27.     res = join_S_V(res)
  28.     return [k for k,_ in IT.groupby(res)]
  29.    
  30. def ph_sign(phon, special="EY"):
  31.     res= []
  32.     for p in phon:
  33.         c = p[:2]
  34.         if  c == special:
  35.             x = 'S'
  36.         elif c[0] in CONSONANTS or c == "ER":
  37.             x = 'C'
  38.         elif c[0] in VOWELS:
  39.             x = 'V'
  40.         res.append(x)
  41.     res = join_S_V(res)
  42.     return [k for k,_ in IT.groupby(res)]
  43.    
  44. def match(word, phon, special):
  45.     if word.startswith("MC"): word = "MAC" + word[2:]
  46.     s1 = sign(word, special)
  47.     s2 = ph_sign(phon)
  48.     return ('S', 'S') in zip(s1,s2)
  49.    
  50. def check_pos(word, phon, special):
  51.     ind = word.index(special)
  52.     tol = 2
  53.     plen = len(phon)
  54.     wlen = len(word)
  55.     pind = [i for i, p in enumerate(phon) if p.startswith("EY")]
  56.     if not pind:
  57.         return False
  58.     wordpos = (ind + 0.5)
  59.     phonpos = [i / plen * wlen for i in pind]
  60.     return any(abs(p - wordpos) <= tol for p in phonpos)
  61.    
  62. def sounds_like(line, special):
  63.     e = line.split()
  64.     word = e[0]
  65.     phon = e[1:]
  66.     if not check_pos(word, phon, special):
  67.         return False
  68.     if not any(p[:2] == "EY" for p in phon):
  69.         return False
  70.     return match(word, phon, special)
  71.    
  72. def against_rule2(line):
  73.     word = line.split()[0]
  74.     ind = word.find("IE")
  75.     if  ind == -1:
  76.         return False
  77.     return sounds_like(line, "IE")
  78.    
  79. def against_rule3(line):
  80.     word = line.split()[0]
  81.     ind = word.find("EI")
  82.     if  ind == -1:
  83.         return False
  84.     if word.find("CEI") >= 0:
  85.         return False
  86.     return not sounds_like(line, "EI")
  87.    
  88. def ibeforee_exceptions():
  89.     """ the following combinations are against the rule:
  90.        1. CIE
  91.        2. IE that sound like EY[012]
  92.        3. EI that do not sound like EY[012] and have no C in front
  93.        What about:
  94.            EIE like in German names, which are on the list
  95.            AIE like MAIER (M EY1 ER0) where the EY1 is more for AI than IE
  96.            DIEGO (D IY0 EY1 G OW0) I and E are separate sounds
  97.            ATHEIST where E and I are separate sounds
  98.            DOSSIER (D AO2 S Y EY10)
  99.            FRASIER'S  F R EY1 ZH ER0 Z EY1 is for A
  100.    """
  101.     f = open("cmudict_ie.txt")
  102.     words = 0
  103.     violate1 = violate2 = violate3 = 0
  104.     with open("cmudict_ie.txt") as f:
  105.             for line in f:
  106.                 words += 1
  107.                 line = line[:-1]
  108.                 elements = line.strip().split()
  109.                 line = " ".join(elements)
  110.                 word = elements[0]
  111.                 if "CIE" in word:
  112.                     print "rule 1:", line
  113.                     violate1 += 1
  114.                 elif against_rule2(line):
  115.                     print "rule 2:", line
  116.                     violate2 += 1
  117.                 elif against_rule3(line):
  118.                     print "rule 3:", line
  119.                     violate3 += 1
  120.             print "number of words", words
  121.             print "CIE            ", violate1
  122.             print "IE that sound like EY[012]", violate2
  123.             print "EI that do not sound like EY[012] and have no C in front", violate3
  124.             print "total number violations", violate1 + violate2 + violate3
  125.    
  126. ibeforee_exceptions()
clone this paste RAW Paste Data