Advertisement
Venciity

KMeans vs MiniBatchKMeans

May 28th, 2018
158
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.21 KB | None | 0 0
  1. import time
  2.  
  3. import numpy as np
  4. import matplotlib.pyplot as plt
  5.  
  6. from sklearn.cluster import MiniBatchKMeans, KMeans
  7. from sklearn.metrics.pairwise import pairwise_distances_argmin
  8. from sklearn.datasets.samples_generator import make_blobs
  9.  
  10. # Generate sample data
  11. np.random.seed(0)
  12.  
  13. batch_size = 45
  14. centers = [[1, 1], [-1, -1], [1, -1]]
  15. n_clusters = len(centers)
  16. n_samples = 10_000
  17. X, labels_true = make_blobs(n_samples = n_samples, centers = centers, cluster_std = 0.7)
  18.  
  19. # Compute clustering with Means
  20. k_means = KMeans(init = "k-means++", n_clusters = 3, n_init = 10)
  21. time_start = time.time()
  22. k_means.fit(X)
  23. time_batch = time.time() - time_start
  24.  
  25. # Compute clustering with MiniBatchKMeans
  26. mbk = MiniBatchKMeans(init = "k-means++", n_clusters = 3, batch_size = batch_size,
  27.                       n_init = 10, max_no_improvement = 10, verbose = 0)
  28. time_start = time.time()
  29. mbk.fit(X)
  30. time_mini_batch = time.time() - time_start
  31.  
  32. # Plot result
  33. fig = plt.figure(figsize = (12, 4))
  34. fig.subplots_adjust(left = 0.02, right = 0.98, bottom = 0.05, top = 0.9)
  35. colors = ["#4EACC5", "#FF9C34", "#4E9A06"]
  36.  
  37. # We want to have the same colors for the same cluster from the MiniBatchKMeans and the KMeans algorithm.
  38. # Let"s pair the cluster centers per closest one.
  39. k_means_cluster_centers = np.sort(k_means.cluster_centers_, axis = 0)
  40. mbk_means_cluster_centers = np.sort(mbk.cluster_centers_, axis = 0)
  41. k_means_labels = pairwise_distances_argmin(X, k_means_cluster_centers)
  42. mbk_means_labels = pairwise_distances_argmin(X, mbk_means_cluster_centers)
  43. order = pairwise_distances_argmin(k_means_cluster_centers, mbk_means_cluster_centers)
  44.  
  45. # KMeans
  46. ax = fig.add_subplot(1, 3, 1)
  47. for k, color in zip(range(n_clusters), colors):
  48.     my_members = k_means_labels == k
  49.     cluster_center = k_means_cluster_centers[k]
  50.     ax.plot(X[my_members, 0], X[my_members, 1], "w", markerfacecolor = color, marker = ".")
  51.     ax.plot(cluster_center[0], cluster_center[1], "o", markerfacecolor = color, markeredgecolor = "k", markersize = 6)
  52. ax.set_title("KMeans")
  53. ax.set_xticks(())
  54. ax.set_yticks(())
  55. plt.text(-3.5, 1.8, "train time: %.2fs\ninertia: %f" % (time_batch, k_means.inertia_))
  56.  
  57. # MiniBatchKMeans
  58. ax = fig.add_subplot(1, 3, 2)
  59. for k, color in zip(range(n_clusters), colors):
  60.     my_members = mbk_means_labels == order[k]
  61.     cluster_center = mbk_means_cluster_centers[order[k]]
  62.     ax.plot(X[my_members, 0], X[my_members, 1], "w", markerfacecolor = color, marker = ".")
  63.     ax.plot(cluster_center[0], cluster_center[1], "o", markerfacecolor = color, markeredgecolor = "k", markersize = 6)
  64. ax.set_title("MiniBatchKMeans")
  65. ax.set_xticks(())
  66. ax.set_yticks(())
  67. plt.text(-3.5, 1.8, "train time: %.2fs\ninertia: %f" % (time_mini_batch, mbk.inertia_))
  68.  
  69. # Initialise the different array to all False
  70. different = (mbk_means_labels == 4)
  71. ax = fig.add_subplot(1, 3, 3)
  72. for k in range(n_clusters):
  73.     different += ((k_means_labels == k) != (mbk_means_labels == order[k]))
  74. identic = np.logical_not(different)
  75. ax.plot(X[identic, 0], X[identic, 1], "w", markerfacecolor = "#bbbbbb", marker = ".")
  76. ax.plot(X[different, 0], X[different, 1], "w", markerfacecolor = "r", marker = ".")
  77. ax.set_title("Difference")
  78. ax.set_xticks(())
  79. ax.set_yticks(())
  80.  
  81. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement