Advertisement
Guest User

Untitled

a guest
Jan 16th, 2017
68
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.96 KB | None | 0 0
  1. # initializes centers using kmeans++ method
  2.     def _init_plus(self):
  3.         first = self.data[np.random.choice(self.data.shape[0], 1)]
  4.         clusters = [ first ]
  5.        
  6.         # for each point, find distance squared from the cluster, then normalize into weights
  7.         weights = np.apply_along_axis(lambda row: np.linalg.norm(row - first)**2 , 1, self.data)
  8.         weights /= sum(weights)
  9.        
  10.         while len(clusters) < self.n_clusters:
  11.             new_cluster = self.data[np.random.choice(self.data.shape[0], 1, p = weights)]
  12.             clusters.append(new_cluster)
  13.             #update weights with distances -- looking for distance to closest cluster, so just take min
  14.             for (i, row) in enumerate(self.data):
  15.                 old_dist = weights[i]
  16.                 new_dist = np.linalg.norm(row - new_cluster)**2
  17.                 weights[i] = min(old_dist, new_dist)
  18.             weights /= sum(weights)
  19.        
  20.         return clusters
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement