Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def kmeans(data, k, c=None):
- if c is not None:
- centroids = c
- else:
- centroids = []
- centroids = randomize_centroids(data, centroids, k)
- old_centroids = [[] for i in range(k)]
- iterations = 0
- while not (has_converged(centroids, old_centroids, iterations)):
- iterations += 1
- clusters = [[] for i in range(k)]
- # assign data points to clusters
- clusters = euclidean_dist(data, centroids, clusters)
- # recalculate centroids
- index = 0
- for cluster in clusters:
- old_centroids[index] = centroids[index]
- centroids[index] = np.mean(cluster, axis=0).tolist()
- index += 1
- print("The total number of data instances is: " + str(len(data)))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement