Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def suggest_alternatives(w):
- """Opens words.txt and returns a list of possible corrections for word 'w'
- words.txt is the generic word dictionary for linux with a little under 100k words"""
- result = []
- words = get_words('words.txt')
- wordset = set(words)
- start = time.time()
- for word in wordset:
- #if 1 >= len(w) - len(word) >= -1: continue
- #if (len(word) > (len(w)+1)) and (len(word) < (len(w)-1)): continue
- if abs(len(w) - len(word)) > 1: continue
- distance = 0
- distance = levenshtein(w,word)
- if (distance == 1):
- result.append(word)
- print timer(start)
- return result
- def levenshtein(s1, s2):
- "Calculates the Levenshtein distance between string 1 and string 2."
- if len(s1) < len(s2):
- return levenshtein(s2, s1)
- if len(s2) == 0:
- return len(s1)
- previous_row = xrange(len(s2) + 1)
- for i, c1 in enumerate(s1):
- current_row = [i + 1]
- for j, c2 in enumerate(s2):
- insertions = previous_row[j + 1] + 1 # j+1 instead of j since previous_row and current_row are one character longer
- deletions = current_row[j] + 1 # than s2
- substitutions = previous_row[j] + (c1 != c2)
- current_row.append(min(insertions, deletions, substitutions))
- previous_row = current_row
- return previous_row[-1]
- def timer(s):
- return "Executed in %0.5f" % float(time.time() - s)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement