Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Solution
- def K_means_clustering(X, n_clusters=2, seed=1, num_iterations=10):
- # Initialitize centroids based on a random selection of #n_clusters samples of X
- rng = np.random.RandomState(seed)
- i = rng.permutation(X.shape[0])[:n_clusters]
- centroids = X[i]
- labels = []
- #Repeat the process during num_iterations or convergence achieved
- for num in range(0, num_iterations):
- distancesMatrix = np.matrix([calculate_distance(X, centroid) for centroid in centroids]).T
- #For each iteration, calculate the shortest distance of each point of X to centroids
- #Labels are based on the index in the centroids array
- labels = [distancesMatrix[j].argmin() for j in range(len(distancesMatrix))]
- #Calculate the new centroids based on the means of each point assigned to each cluster
- new_centroids= [np.array([0 for _ in range(X.shape[1])]) for n in range(n_clusters)]
- for j in range(len(X)):
- new_centroids[labels[j]] = new_centroids[labels[j]] + X[j]
- for j in range(len(new_centroids)):
- new_centroids[j] = new_centroids[j]/labels.count(j)
- # Evaluate convergence: if new_centroids=centroids, stop iterations
- if np.array_equal(centroids, new_centroids):
- print('Convergence achieved with:',num+1, 'iterations')
- break
- else:
- if (num+1)%10 == 0 and num != 0:
- print('No convergence yet after', num+1, 'iterations')
- centroids = new_centroids
- # print(labels.count(0), labels.count(1), labels.count(2))
- return centroids, labels
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement