Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn import cluster, metrics
- import numpy as np
- from matplotlib import pyplot, mlab
- def kmeans(cluster_input):
- n_clusters = 2
- k_means = cluster.KMeans(n_clusters=n_clusters)
- k_means.fit(cluster_input)
- plot_n_clusters(n_clusters,k_means)
- sil_score(cluster_input,k_means.labels_)
- def shiftmeans(cluster_input):
- bandwidth = cluster.estimate_bandwidth(cluster_input, quantile=0.5, n_samples=None)
- ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)
- ms.fit(cluster_input)
- labels = ms.labels_
- labels_unique = np.unique(labels)
- n_clusters = len(labels_unique)
- print("Number of estimated clusters : ", str(n_clusters))
- plot_n_clusters(n_clusters,ms)
- sil_score(cluster_input,ms.labels_)
- def plot_n_clusters(n_clusters,cluster_obj):
- all_colors = ['red', 'blue','green','purple', 'magenta', 'yellow', 'cyan', 'orange', 'black', 'gray', 'brown']
- pyplot.figure()
- for k, col in zip(range(n_clusters), all_colors[:n_clusters]):
- my_members = cluster_obj.labels_== k
- cluster_center = cluster_obj.cluster_centers_[k]
- pyplot.plot(cluster_input[my_members, 0], cluster_input[my_members, 1], 'w',
- markerfacecolor=col, marker='.')
- pyplot.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
- markeredgecolor='k', markersize=6)
- description = str(type(cluster_obj)).split('.')
- pyplot.title(description[-1:])
- pyplot.xlabel('var1')
- pyplot.ylabel('var2')
- pyplot.show()
- def sil_score(cluster_input,labels):
- sil = metrics.silhouette_score(cluster_input, labels, sample_size=1000)
- print('Silhouette score: ', str(sil))
- return sil
- filename = "your_file_name_with_path"
- dat = mlab.csv2rec(filename)
- cluster_input = np.vstack([dat['col_name1'],dat['col_name2']]).transpose() # columns must be the same length
- kmeans(cluster_input)
- shiftmeans(cluster_input)
Add Comment
Please, Sign In to add comment