Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import time
- import numpy as np
- import matplotlib.pyplot as plt
- from sklearn.cluster import MiniBatchKMeans, KMeans
- from sklearn.metrics.pairwise import pairwise_distances_argmin
- from sklearn.datasets.samples_generator import make_blobs
- # Generate sample data
- np.random.seed(0)
- batch_size = 45
- centers = [[1, 1], [-1, -1], [1, -1]]
- n_clusters = len(centers)
- n_samples = 10_000
- X, labels_true = make_blobs(n_samples = n_samples, centers = centers, cluster_std = 0.7)
- # Compute clustering with Means
- k_means = KMeans(init = "k-means++", n_clusters = 3, n_init = 10)
- time_start = time.time()
- k_means.fit(X)
- time_batch = time.time() - time_start
- # Compute clustering with MiniBatchKMeans
- mbk = MiniBatchKMeans(init = "k-means++", n_clusters = 3, batch_size = batch_size,
- n_init = 10, max_no_improvement = 10, verbose = 0)
- time_start = time.time()
- mbk.fit(X)
- time_mini_batch = time.time() - time_start
- # Plot result
- fig = plt.figure(figsize = (12, 4))
- fig.subplots_adjust(left = 0.02, right = 0.98, bottom = 0.05, top = 0.9)
- colors = ["#4EACC5", "#FF9C34", "#4E9A06"]
- # We want to have the same colors for the same cluster from the MiniBatchKMeans and the KMeans algorithm.
- # Let"s pair the cluster centers per closest one.
- k_means_cluster_centers = np.sort(k_means.cluster_centers_, axis = 0)
- mbk_means_cluster_centers = np.sort(mbk.cluster_centers_, axis = 0)
- k_means_labels = pairwise_distances_argmin(X, k_means_cluster_centers)
- mbk_means_labels = pairwise_distances_argmin(X, mbk_means_cluster_centers)
- order = pairwise_distances_argmin(k_means_cluster_centers, mbk_means_cluster_centers)
- # KMeans
- ax = fig.add_subplot(1, 3, 1)
- for k, color in zip(range(n_clusters), colors):
- my_members = k_means_labels == k
- cluster_center = k_means_cluster_centers[k]
- ax.plot(X[my_members, 0], X[my_members, 1], "w", markerfacecolor = color, marker = ".")
- ax.plot(cluster_center[0], cluster_center[1], "o", markerfacecolor = color, markeredgecolor = "k", markersize = 6)
- ax.set_title("KMeans")
- ax.set_xticks(())
- ax.set_yticks(())
- plt.text(-3.5, 1.8, "train time: %.2fs\ninertia: %f" % (time_batch, k_means.inertia_))
- # MiniBatchKMeans
- ax = fig.add_subplot(1, 3, 2)
- for k, color in zip(range(n_clusters), colors):
- my_members = mbk_means_labels == order[k]
- cluster_center = mbk_means_cluster_centers[order[k]]
- ax.plot(X[my_members, 0], X[my_members, 1], "w", markerfacecolor = color, marker = ".")
- ax.plot(cluster_center[0], cluster_center[1], "o", markerfacecolor = color, markeredgecolor = "k", markersize = 6)
- ax.set_title("MiniBatchKMeans")
- ax.set_xticks(())
- ax.set_yticks(())
- plt.text(-3.5, 1.8, "train time: %.2fs\ninertia: %f" % (time_mini_batch, mbk.inertia_))
- # Initialise the different array to all False
- different = (mbk_means_labels == 4)
- ax = fig.add_subplot(1, 3, 3)
- for k in range(n_clusters):
- different += ((k_means_labels == k) != (mbk_means_labels == order[k]))
- identic = np.logical_not(different)
- ax.plot(X[identic, 0], X[identic, 1], "w", markerfacecolor = "#bbbbbb", marker = ".")
- ax.plot(X[different, 0], X[different, 1], "w", markerfacecolor = "r", marker = ".")
- ax.set_title("Difference")
- ax.set_xticks(())
- ax.set_yticks(())
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement