Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn import datasets
- # import some data to play with
- import numpy as np
- import statistics
- data = np.random.randint(0,25,size=(10,2))
- def k_means(data,k):
- n = data.shape[0]
- nvar = data.shape[1]
- centroids = np.random.randint(0,25,size=(k,nvar))
- new_centroids = np.zeros(shape=(k,nvar))
- diff_matrix = np.zeros(shape = (n,k))
- counter = 1
- while np.array_equal(new_centroids,centroids) == False:
- if counter > 1:
- centroids = new_centroids
- for i in range(k) :
- distance = (data-centroids[i])**2
- diff_matrix[:,i] = distance.sum(axis=1)
- ## Indices containing the minimum for each diff_matrix
- ind_min = np.argmin(diff_matrix, axis=1)
- ## calculate the centroids with new data
- for i in range(k):
- new_centroids[i]= np.mean(data[ind_min == i], axis =0)
- #for i in range(k):
- # print("Step", counter, "No convergence yet")
- # print("Cluster",i,"contains the following data:",data[ind_min == i])
- counter = counter + 1
- for i in range(k):
- print("Cluster",i,"contains the following data:",data[ind_min == i])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement