Advertisement
Guest User

Untitled

a guest
May 12th, 2017
108
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.87 KB | None | 0 0
  1. tfidf = TfidfVectorizer().fit_transform(documents)
  2. min_percent = int(min_percent) / 100.0
  3. row_length = len(documents)
  4.  
  5. result = {}
  6.  
  7. for i in range(row_length):
  8.     row = linear_kernel(tfidf[i:i+1], tfidf).flatten()
  9.     for n in range(row_length):
  10.         if i != n and row[n] >= min_percent:
  11.             if not result.get(i):
  12.                 result[i] = []
  13.             result[i].append((n, row[n]))
  14.  
  15. # Postprocessing
  16. clean_result = deepcopy(result)
  17. for key, values in result.iteritems():
  18.     if key in clean_result:
  19.         for value in values:
  20.             if value in clean_result[key] and value[0] in clean_result:
  21.                 for value2 in clean_result[value[0]]:
  22.                     if value2[0] == key:
  23.                         clean_result[value[0]].remove(value2)
  24.                 if not clean_result[value[0]]:
  25.                     clean_result.pop(value[0])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement