Guest User

Untitled

a guest
Apr 22nd, 2018
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.92 KB | None | 0 0
  1. from sklearn import cluster, metrics
  2. import numpy as np
  3. from matplotlib import pyplot, mlab
  4.  
  5. def kmeans(cluster_input):
  6.  
  7. n_clusters = 2
  8. k_means = cluster.KMeans(n_clusters=n_clusters)
  9. k_means.fit(cluster_input)
  10.  
  11. plot_n_clusters(n_clusters,k_means)
  12. sil_score(cluster_input,k_means.labels_)
  13.  
  14. def shiftmeans(cluster_input):
  15.  
  16. bandwidth = cluster.estimate_bandwidth(cluster_input, quantile=0.5, n_samples=None)
  17.  
  18. ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)
  19. ms.fit(cluster_input)
  20. labels = ms.labels_
  21. labels_unique = np.unique(labels)
  22. n_clusters = len(labels_unique)
  23. print("Number of estimated clusters : ", str(n_clusters))
  24.  
  25. plot_n_clusters(n_clusters,ms)
  26. sil_score(cluster_input,ms.labels_)
  27.  
  28. def plot_n_clusters(n_clusters,cluster_obj):
  29.  
  30. all_colors = ['red', 'blue','green','purple', 'magenta', 'yellow', 'cyan', 'orange', 'black', 'gray', 'brown']
  31.  
  32. pyplot.figure()
  33. for k, col in zip(range(n_clusters), all_colors[:n_clusters]):
  34. my_members = cluster_obj.labels_== k
  35. cluster_center = cluster_obj.cluster_centers_[k]
  36. pyplot.plot(cluster_input[my_members, 0], cluster_input[my_members, 1], 'w',
  37. markerfacecolor=col, marker='.')
  38. pyplot.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
  39. markeredgecolor='k', markersize=6)
  40.  
  41. description = str(type(cluster_obj)).split('.')
  42. pyplot.title(description[-1:])
  43.  
  44. pyplot.xlabel('var1')
  45. pyplot.ylabel('var2')
  46. pyplot.show()
  47.  
  48. def sil_score(cluster_input,labels):
  49.  
  50. sil = metrics.silhouette_score(cluster_input, labels, sample_size=1000)
  51. print('Silhouette score: ', str(sil))
  52. return sil
  53.  
  54. filename = "your_file_name_with_path"
  55. dat = mlab.csv2rec(filename)
  56. cluster_input = np.vstack([dat['col_name1'],dat['col_name2']]).transpose() # columns must be the same length
  57.  
  58. kmeans(cluster_input)
  59. shiftmeans(cluster_input)
Add Comment
Please, Sign In to add comment