Advertisement
Artashes

IP Task1

Feb 3rd, 2019
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.17 KB | None | 0 0
  1. import time
  2.  
  3.  
  4. def distance(a, b):
  5.     n, m = len(a), len(b)
  6.     if n > m:
  7.         a, b = b, a
  8.         n, m = m, n
  9.  
  10.     current_row = range(n+1)
  11.     for i in range(1, m+1):
  12.         previous_row, current_row = current_row, [i]+[0]*n
  13.         for j in range(1, n+1):
  14.             add, delete, change = previous_row[j]+1, current_row[j-1]+1, previous_row[j-1]
  15.             if a[j-1] != b[i-1]:
  16.                 change += 1
  17.             current_row[j] = min(add, delete, change)
  18.  
  19.     return current_row[n]
  20.  
  21.  
  22. def levenshtein(a, ws):
  23.     result = []
  24.     for word in ws:
  25.         result.append([distance(a, word), word])
  26.     result.sort()
  27.     return result[:10]
  28.  
  29.  
  30. def get_words():
  31.     f = open('words.txt', 'r')
  32.     main_text = f.readlines()
  33.     f.close()
  34.  
  35.     words = []
  36.     for line in main_text:
  37.         words.extend(line.replace("'", "").replace("`", "").replace("\n", "").replace("#", ",").split(","))
  38.     return set(words)
  39.  
  40.  
  41. def k_grams(k, ws):
  42.     result = {}
  43.     for word in ws:
  44.         for i in range(0, len(word) - k + 1):
  45.             gram = word[i:i + k]
  46.             if gram in result.keys():
  47.                 result[gram].add(word)
  48.             else:
  49.                 result[gram] = {word}
  50.     return result
  51.  
  52.  
  53. def create_list(a, grams, k):
  54.     result = set()
  55.     for i in range(0, len(a) - k + 1):
  56.         gram = a[i:i + k]
  57.         if gram in grams.keys():
  58.             result.update(grams[gram])
  59.     return result
  60.  
  61.  
  62. def program():
  63.     words = get_words()
  64.     k = 3
  65.  
  66.     print("-------------------------------")
  67.     print("Indexes:")
  68.     start_time = time.time()
  69.     grams = k_grams(k, words)
  70.     print("--- %s seconds ---\n" % (time.time() - start_time))
  71.  
  72.     start_time = time.time()
  73.     print("-------------------------------")
  74.     print("Levenshtein:")
  75.     print(levenshtein("афионосецы", words))
  76.     print("--- %s seconds ---\n" % (time.time() - start_time))
  77.  
  78.     start_time = time.time()
  79.     print("-------------------------------")
  80.     print("K gram:")
  81.     print(levenshtein("афионосецы", create_list("афионосецы", grams, k)))
  82.     print("--- %s seconds ---\n" % (time.time() - start_time))
  83.  
  84.  
  85. program()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement