Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import time
- def distance(a, b):
- n, m = len(a), len(b)
- if n > m:
- a, b = b, a
- n, m = m, n
- current_row = range(n+1)
- for i in range(1, m+1):
- previous_row, current_row = current_row, [i]+[0]*n
- for j in range(1, n+1):
- add, delete, change = previous_row[j]+1, current_row[j-1]+1, previous_row[j-1]
- if a[j-1] != b[i-1]:
- change += 1
- current_row[j] = min(add, delete, change)
- return current_row[n]
- def levenshtein(a, ws):
- result = []
- for word in ws:
- result.append([distance(a, word), word])
- result.sort()
- return result[:10]
- def get_words():
- f = open('words.txt', 'r')
- main_text = f.readlines()
- f.close()
- words = []
- for line in main_text:
- words.extend(line.replace("'", "").replace("`", "").replace("\n", "").replace("#", ",").split(","))
- return set(words)
- def k_grams(k, ws):
- result = {}
- for word in ws:
- for i in range(0, len(word) - k + 1):
- gram = word[i:i + k]
- if gram in result.keys():
- result[gram].add(word)
- else:
- result[gram] = {word}
- return result
- def create_list(a, grams, k):
- result = set()
- for i in range(0, len(a) - k + 1):
- gram = a[i:i + k]
- if gram in grams.keys():
- result.update(grams[gram])
- return result
- def program():
- words = get_words()
- k = 3
- print("-------------------------------")
- print("Indexes:")
- start_time = time.time()
- grams = k_grams(k, words)
- print("--- %s seconds ---\n" % (time.time() - start_time))
- start_time = time.time()
- print("-------------------------------")
- print("Levenshtein:")
- print(levenshtein("афионосецы", words))
- print("--- %s seconds ---\n" % (time.time() - start_time))
- start_time = time.time()
- print("-------------------------------")
- print("K gram:")
- print(levenshtein("афионосецы", create_list("афионосецы", grams, k)))
- print("--- %s seconds ---\n" % (time.time() - start_time))
- program()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement