Advertisement
Guest User

salad tomate oignon

a guest
Nov 14th, 2019
96
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.21 KB | None | 0 0
  1. from sklearn import datasets
  2. # import some data to play with
  3. import numpy as np
  4. import statistics
  5.  
  6. data = np.random.randint(0,25,size=(10,2))
  7.  
  8. def k_means(data,k):
  9. n = data.shape[0]
  10. nvar = data.shape[1]
  11. centroids = np.random.randint(0,25,size=(k,nvar))
  12. new_centroids = np.zeros(shape=(k,nvar))
  13. diff_matrix = np.zeros(shape = (n,k))
  14. counter = 1
  15.  
  16. while np.array_equal(new_centroids,centroids) == False:
  17. if counter > 1:
  18. centroids = new_centroids
  19. for i in range(k) :
  20. distance = (data-centroids[i])**2
  21. diff_matrix[:,i] = distance.sum(axis=1)
  22.  
  23. ## Indices containing the minimum for each diff_matrix
  24. ind_min = np.argmin(diff_matrix, axis=1)
  25.  
  26. ## calculate the centroids with new data
  27. for i in range(k):
  28. new_centroids[i]= np.mean(data[ind_min == i], axis =0)
  29. #for i in range(k):
  30. # print("Step", counter, "No convergence yet")
  31. # print("Cluster",i,"contains the following data:",data[ind_min == i])
  32. counter = counter + 1
  33.  
  34.  
  35. for i in range(k):
  36. print("Cluster",i,"contains the following data:",data[ind_min == i])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement