Advertisement
Guest User

Untitled

a guest
Oct 15th, 2020
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.45 KB | None | 0 0
  1.  
  2. def fast_knn(elements, k, neighborhood_fraction=.01, metric='euclidean'):
  3.  
  4.     # Finds the indices of k nearest neighbors for each sample in a matrix,
  5.     # using any of the standard scipy distance metrics.
  6.  
  7.     nearest_neighbors = np.zeros((elements.shape[0], k), dtype=int)
  8.     complete = np.zeros(elements.shape[0], dtype=bool)
  9.  
  10.     neighborhood_size = max(
  11.         k * 3, int(elements.shape[0] * neighborhood_fraction))
  12.     anchor_loops = 0
  13.  
  14.     while np.sum(complete) < complete.shape[0]:
  15.  
  16.         anchor_loops += 1
  17.  
  18.         available = np.arange(complete.shape[0])[~complete]
  19.         np.random.shuffle(available)
  20.         anchors = available[:int(complete.shape[0] / neighborhood_size) * 3]
  21.  
  22.         for anchor in anchors:
  23.             print(f"Complete:{np.sum(complete)}\r", end='')
  24.  
  25.             anchor_distances = cdist(elements[anchor].reshape(
  26.                 1, -1), elements, metric=metric)[0]
  27.  
  28.             neighborhood = np.argpartition(anchor_distances, neighborhood_size)[
  29.                 :neighborhood_size]
  30.             anchor_local = np.where(neighborhood == anchor)[0]
  31.  
  32.             local_distances = squareform(
  33.                 pdist(elements[neighborhood], metric=metric))
  34.  
  35.             anchor_to_worst = np.max(local_distances[anchor_local])
  36.  
  37.             for i, sample in enumerate(neighborhood):
  38.                 if not complete[sample]:
  39.  
  40.                     # First select the indices in the neighborhood that are knn
  41.                     best_neighbors_local = np.argpartition(
  42.                         local_distances[i], k + 1)
  43.  
  44.                     # Next find the worst neighbor among the knn observed
  45.                     best_worst_local = best_neighbors_local[np.argmax(
  46.                         local_distances[i][best_neighbors_local[:k + 1]])]
  47.                     # And store the worst distance among the local knn
  48.                     best_worst_distance = local_distances[i, best_worst_local]
  49.                     # Find the distance of the anchor to the central element
  50.                     anchor_distance = local_distances[anchor_local, i]
  51.  
  52.                     # By the triangle inequality the closest any element outside the neighborhood
  53.                     # can be to element we are examining is the criterion distance:
  54.                     criterion_distance = anchor_to_worst - anchor_distance
  55.  
  56. #                     if sample == 0:
  57. #                         print(f"ld:{local_distances[i][best_neighbors_local[:k]]}")
  58. #                         print(f"bwd:{best_worst_distance}")
  59. #                         print(f"cd:{criterion_distance}")
  60.  
  61.                     # Therefore if the criterion distance is greater than the best worst distance, the local knn
  62.                     # is also the best global knn
  63.  
  64.                     if best_worst_distance >= criterion_distance:
  65.                         continue
  66.                     else:
  67.                         # Before we conclude we must exclude the sample itself from its
  68.                         # k nearest neighbors
  69.                         best_neighbors_local = [
  70.                             bn for bn in best_neighbors_local[:k + 1] if bn != i]
  71.                         # Finally translate the local best knn to the global indices
  72.                         best_neighbors = neighborhood[best_neighbors_local]
  73.  
  74.                         nearest_neighbors[sample] = best_neighbors
  75.                         complete[sample] = True
  76.     print("\n")
  77.  
  78.     return nearest_neighbors
  79.  
  80.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement