Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def soundex(code):
- old_word = list(code.lower())
- code_word = [0]*len(old_word)
- new_word = ['0']*len(old_word)
- remove_list = ['a', 'e', 'i', 'o', 'u', 'y', 'h', 'w']
- c1 = ['b','f','p','v']
- c2 = ['c', 'g', 'j', 'k', 'q', 's', 'x', 'z']
- c3 = ['d', 't']
- c4 = ['l']
- c5 = ['m', 'n']
- c6 = ['r']
- for i, j in enumerate(old_word):
- if j in c1:
- code_word[i] = "1"
- elif j in c2:
- code_word[i] = "2"
- elif j in c3:
- code_word[i] = "3"
- elif j in c4:
- code_word[i] = "4"
- elif j in c5:
- code_word[i] = "5"
- elif j in c6:
- code_word[i] = "6"
- else:
- code_word[i] = j
- # print(code_word)
- for i, j in enumerate(old_word):
- if i == 0:
- new_word[i] = j
- elif i == 1:
- if code_word[i-1] == code_word[i]:
- new_word[i] = "0"
- elif j in c1 or j in c2 or j in c3 or j in c4 or j in c5 or j in c6:
- """ Ще один спосіб записати умову вище:
- elif j in c1+c2+c3+c4+c5+c6:
- """
- new_word[i] = code_word[i]
- elif j in remove_list:
- if j == "h" or j == "w":
- """ Коротша форма запису:
- if j in 'hw':
- """
- try:
- if code_word[i-1] == code_word[i+1]:
- new_word[i-1] = "0"
- new_word[i+1] = code_word[i+1]
- new_word[i] = "0"
- except IndexError:
- break
- else:
- new_word[i] = "0"
- elif j in c1 or j in c2 or j in c3 or j in c4 or j in c5 or j in c6:
- if code_word[i] == code_word[i-1]:
- new_word[i-1] = "0"
- new_word[i] = code_word[i]
- else:
- new_word[i] = code_word[i]
- result = list("".join(new_word).replace("0", ""))
- if len(result) > 4:
- result = result[:4]
- elif len(result) < 4:
- for i in range(4-len(result)):
- result.append("0")
- # print("".join(result).capitalize())
- return "".join(result).capitalize()
- def test_soundex():
- tests = (
- ('Robert', 'R163'),
- ('Rupert', 'R163'),
- ('Rubin', 'R150'),
- ('Ashcraft', 'A261'),
- ('Ashcroft', 'A261'),
- ('Tymczak', 'T522'),
- ('Pfister', 'P236'),
- ('Honeyman', 'H555'),
- ('Burroughs', 'B620'),
- ('Burrows', 'B620'),
- ('Ciondecks', 'C532'),
- ('Ellery', 'E460'),
- ('Euler', 'E460'),
- ('Example', 'E251'),
- ('Gauss', 'G200'),
- ('Ghosh', 'G200'),
- ('Heilbronn', 'H416'),
- ('Hilbert', 'H416'),
- ('Kant', 'K530'),
- ('Knuth', 'K530'),
- ('Ladd', 'L300'),
- ('Lissajous', 'L222'),
- ('Lloyd', 'L300'),
- ('Lukasiewicz', 'L222'),
- ('O\'Hara', 'O600'),
- ('Soundex', 'S532'),
- ('Wheaton', 'W350'),
- )
- for w, c in tests:
- r = soundex(w)
- assert r == c, \
- 'soundex("%s") returns "%s", should be "%s"' % (w, r, c)
- def validate(code):
- words = open('words.txt')
- words_list = words.read().lower().split()
- text_list = []
- for i in code:
- i = i.replace(".", "")
- i = i.replace(",", "")
- i = i.replace("-", " ")
- i = i.replace(")", "")
- i = i.replace("(", "")
- i = i.strip()
- text_list.append(i.lower().split())
- for i, l in enumerate(text_list):
- for j in l:
- if j not in words_list:
- suggestions = []
- for w in words_list:
- if soundex(j) == soundex(w):
- suggestions.append(w)
- # print(suggestions)
- print("Found unknown word %s in line %s. Suggestions: %s" % (j, i, ", ".join(suggestions)))
- # print(words_list)
- # print(text_list)
- with open('input.txt') as f:
- validate(f.readlines())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement