Advertisement
kantegory

KMeans algorithm

Apr 25th, 2018
161
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.26 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. from copy import deepcopy
  5.  
  6. data = pd.read_csv('iris.csv')
  7. data.head()
  8.  
  9.  
  10. def distance(v1, v2, ax=1):
  11. return np.linalg.norm(v1 - v2, axis=ax)
  12.  
  13.  
  14. class KMeans:
  15. def __init__(self, n_clusters, max_iter=300):
  16. self.n_clusters = n_clusters
  17.  
  18. def fit(self, X):
  19. n_samples = len(X)
  20. centroids = X[np.random.choice(X.shape[0], self.n_clusters, replace=False)]
  21. centroids_old = np.zeros(centroids.shape)
  22. clusters = np.zeros(n_samples)
  23.  
  24. while True:
  25. for i in range(n_samples):
  26. distances = distance(X[i,], centroids)
  27. clusters[i] = distances.argmin()
  28.  
  29. centroids_old = deepcopy(centroids)
  30. for k in range(self.n_clusters):
  31. centroids[k] = X[clusters == k,].mean(axis=0)
  32.  
  33. error = distance(centroids, centroids_old, None)
  34. if error == 0:
  35. break
  36.  
  37. self.clusters = clusters.astype('int')
  38. self.centroids = centroids
  39.  
  40. def predict(self, y):
  41. # PUT YOUR COD HERE
  42. pass
  43.  
  44.  
  45. model = KMeans(3)
  46. X = (data.loc[:, data.columns != 'Name']).as_matrix()
  47. print(X)
  48. model.fit(X)
  49. print(model.clusters)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement