Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # initializes centers using kmeans++ method
- def _init_plus(self):
- first = self.data[np.random.choice(self.data.shape[0], 1)]
- clusters = [ first ]
- # for each point, find distance squared from the cluster, then normalize into weights
- weights = np.apply_along_axis(lambda row: np.linalg.norm(row - first)**2 , 1, self.data)
- weights /= sum(weights)
- while len(clusters) < self.n_clusters:
- new_cluster = self.data[np.random.choice(self.data.shape[0], 1, p = weights)]
- clusters.append(new_cluster)
- #update weights with distances -- looking for distance to closest cluster, so just take min
- for (i, row) in enumerate(self.data):
- old_dist = weights[i]
- new_dist = np.linalg.norm(row - new_cluster)**2
- weights[i] = min(old_dist, new_dist)
- weights /= sum(weights)
- return clusters
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement