Advertisement
Guest User

Untitled

a guest
Oct 19th, 2019
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.88 KB | None | 0 0
  1. # Import packages
  2. %matplotlib inline
  3. import matplotlib.pyplot as plt
  4. import seaborn as sns; sns.set()  # for plot styling
  5. import numpy as np
  6.  
  7. #need to average the x and y that are cloest. this becomes my new cluster point
  8. # Generate Samples
  9. from sklearn.datasets.samples_generator import make_blobs
  10. X, y_true = make_blobs(n_samples=300, centers=4,
  11.                        cluster_std=0.60, random_state=0)
  12. plt.scatter(X[:, 0], X[:, 1], s=50);
  13. x = X[:, 0]
  14. y = X[:, 1]
  15. #print(x)
  16. #print(y)
  17. #print(len(y))    
  18. #print(len(x))
  19.  
  20. ###############################################
  21. # YOUR CODE GOES HERE
  22. # Put some code to find clusters here
  23. # Assign the clusters and labels in your code
  24. ###############################################
  25. #print(range(len(X)))
  26.  
  27. # Uncomment to display clusters and cluster centers
  28. #plt.scatter(X[:, 0], X[:, 1], c=labels,
  29. #            s=50, cmap='viridis');
  30. #plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5);
  31.  
  32. k = 4
  33. # k_mean_x_curr = np.zeros((k, k))
  34. k_mean_curr = np.zeros((k, 2))
  35. k_mean_y_curr = np.zeros((k, k))
  36. #print(k_mean_x_curr)
  37.  
  38. k_mean_prev = np.zeros((2, k))
  39. #print(k_mean_curr)
  40.  
  41.  
  42. #Initialize cluster centroids
  43.  
  44. centers = np.zeros((k, 2))
  45. for i in range(k):
  46.     centers[i,0] = x[int(len(X)/k*i)]
  47.     centers[i,1] = y[int(len(X)/k*i)]
  48. #print(centers)
  49.  
  50. #Plot initial clusters
  51. plt.scatter(centers[:, 0], centers[:, 1], c='black', s=100, alpha=0.5);
  52. #plt.show()
  53.  
  54. #Calculate euclidean distance from centroids
  55. dist = np.zeros((len(X), k+2))
  56.  
  57. for j in range(len(X)): # for each point
  58.     for i in range(k): # for each centroid
  59.    
  60.         dist[j][i] = np.sqrt((x[j] - centers[i][0])**2 + (y[j] - centers[i][1])**2)
  61.     # populate minimum distance vector
  62.     dist[j][4] = list(dist[j]).index(min(dist[j][0], dist[j][1], dist[j][2], dist[j][3]))
  63.  
  64. print(dist)
  65. #Update Cluster Means
  66. for j in range(len(X)):
  67.     n = int(dist[j][k+1])
  68.     #print(n)
  69.     #print(x[j])
  70.     k_mean_x_curr[0][n] = x[j] + k_mean_x_curr[0][n] #x sum    
  71.     k_mean_y_curr[0][n] = y[j] + k_mean_x_curr[0][n] #y sum
  72.    
  73.     k_mean_x_curr[1][n] += 1 #x count
  74.     k_mean_y_curr[1][n] += 1 #y count
  75.  
  76. # #Calculate mean
  77. for i in range(k):
  78.     if(k_mean_x_curr[1][i]==0):
  79.       k_mean_x_curr[2][i] = 0
  80.     else:
  81.       k_mean_x_curr[2][i] = k_mean_x_curr[0][i] / k_mean_x_curr[1][i]
  82.  
  83.     if(k_mean_y_curr[1][i]==0):
  84.       k_mean_y_curr[2][i] = 0
  85.     else:
  86.       k_mean_y_curr[2][i] = k_mean_y_curr[0][i] / k_mean_y_curr[1][i]
  87.    
  88.    
  89. # print(k_mean_x_curr)
  90. # print(k_mean_y_curr)
  91.  
  92.  
  93.  
  94. #Update centers array with new means
  95. for i in range(k):
  96. #     #print('before: ',centers[i][0])
  97. #     #print(centers[i][1])
  98.     centers[i][0] = k_mean_x_curr[2][i] #x
  99.     centers[i][1] = k_mean_y_curr[2][i] #y
  100. #print('after: ',centers[i][0])
  101. #print(centers[i][1])
  102.  
  103. plt.scatter(centers[:, 0], centers[:, 1], c='red', s=100, alpha=0.5);
  104. #plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement