Advertisement
oleh_korkh

Untitled

Jan 9th, 2018
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.16 KB | None | 0 0
  1. def soundex(code):
  2.     old_word = list(code.lower())
  3.     code_word = [0]*len(old_word)
  4.     new_word = ['0']*len(old_word)
  5.  
  6.  
  7.     remove_list = ['a', 'e', 'i', 'o', 'u', 'y', 'h', 'w']
  8.     c1 = ['b','f','p','v']
  9.     c2 = ['c', 'g', 'j', 'k', 'q', 's', 'x', 'z']
  10.     c3 = ['d', 't']
  11.     c4 = ['l']
  12.     c5 = ['m', 'n']
  13.     c6 = ['r']
  14.  
  15.  
  16.     for i, j in enumerate(old_word):
  17.         if j in c1:
  18.             code_word[i] = "1"
  19.         elif j in c2:
  20.             code_word[i] = "2"
  21.         elif j in c3:
  22.             code_word[i] = "3"
  23.         elif j in c4:
  24.             code_word[i] = "4"
  25.         elif j in c5:
  26.             code_word[i] = "5"
  27.         elif j in c6:
  28.             code_word[i] = "6"
  29.         else:
  30.             code_word[i] = j
  31.  
  32.     # print(code_word)
  33.  
  34.     for i, j in enumerate(old_word):
  35.         if i == 0:
  36.             new_word[i] = j
  37.         elif i == 1:
  38.             if code_word[i-1] == code_word[i]:
  39.                 new_word[i] = "0"
  40.             elif j in c1 or j in c2 or j in c3 or j in c4 or j in c5 or j in c6:
  41.                 """ Ще один спосіб записати умову вище:
  42.  
  43.                    elif j in c1+c2+c3+c4+c5+c6:
  44.                """
  45.                 new_word[i] = code_word[i]
  46.         elif j in remove_list:
  47.             if j == "h" or j == "w":
  48.                 """ Коротша форма запису:
  49.                    if j in 'hw':
  50.                """
  51.                 try:
  52.                     if code_word[i-1] == code_word[i+1]:
  53.                         new_word[i-1] = "0"
  54.                         new_word[i+1] = code_word[i+1]
  55.                     new_word[i] = "0"
  56.                 except IndexError:
  57.                     break
  58.             else:
  59.                 new_word[i] = "0"
  60.         elif j in c1 or j in c2 or j in c3 or j in c4 or j in c5 or j in c6:
  61.             if code_word[i] == code_word[i-1]:
  62.                 new_word[i-1] = "0"
  63.                 new_word[i] = code_word[i]
  64.             else:
  65.                 new_word[i] = code_word[i]
  66.  
  67.     result = list("".join(new_word).replace("0", ""))
  68.  
  69.     if len(result) > 4:
  70.         result = result[:4]
  71.     elif len(result) < 4:
  72.         for i in range(4-len(result)):
  73.             result.append("0")
  74.  
  75.     # print("".join(result).capitalize())
  76.     return "".join(result).capitalize()
  77.  
  78.  
  79. def test_soundex():
  80.     tests = (
  81.         ('Robert', 'R163'),
  82.         ('Rupert', 'R163'),
  83.         ('Rubin', 'R150'),
  84.         ('Ashcraft', 'A261'),
  85.         ('Ashcroft', 'A261'),
  86.         ('Tymczak', 'T522'),
  87.         ('Pfister', 'P236'),
  88.         ('Honeyman', 'H555'),
  89.         ('Burroughs', 'B620'),
  90.         ('Burrows', 'B620'),
  91.         ('Ciondecks', 'C532'),
  92.         ('Ellery', 'E460'),
  93.         ('Euler', 'E460'),
  94.         ('Example', 'E251'),
  95.         ('Gauss', 'G200'),
  96.         ('Ghosh', 'G200'),
  97.         ('Heilbronn', 'H416'),
  98.         ('Hilbert', 'H416'),
  99.         ('Kant', 'K530'),
  100.         ('Knuth', 'K530'),
  101.         ('Ladd', 'L300'),
  102.         ('Lissajous', 'L222'),
  103.         ('Lloyd', 'L300'),
  104.         ('Lukasiewicz', 'L222'),
  105.         ('O\'Hara', 'O600'),
  106.         ('Soundex', 'S532'),
  107.         ('Wheaton', 'W350'),
  108.     )
  109.  
  110.     for w, c in tests:
  111.         r = soundex(w)
  112.         assert r == c, \
  113.             'soundex("%s") returns "%s", should be "%s"' % (w, r, c)
  114.  
  115.  
  116. def validate(code):
  117.     words = open('words.txt')
  118.     words_list = words.read().lower().split()
  119.     text_list = []
  120.  
  121.     for i in code:
  122.         i = i.replace(".", "")
  123.         i = i.replace(",", "")
  124.         i = i.replace("-", " ")
  125.         i = i.replace(")", "")
  126.         i = i.replace("(", "")
  127.         i = i.strip()
  128.         text_list.append(i.lower().split())
  129.  
  130.     for i, l in enumerate(text_list):
  131.         for j in l:
  132.             if j not in words_list:
  133.                 suggestions = []
  134.                 for w in words_list:
  135.                     if soundex(j) == soundex(w):
  136.                         suggestions.append(w)
  137.                 # print(suggestions)
  138.                 print("Found unknown word %s in line %s. Suggestions: %s" % (j, i, ", ".join(suggestions)))
  139.  
  140.  
  141.     # print(words_list)
  142.     # print(text_list)
  143.  
  144.  
  145. with open('input.txt') as f:
  146.     validate(f.readlines())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement