Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Import packages
- %matplotlib inline
- import matplotlib.pyplot as plt
- import seaborn as sns; sns.set() # for plot styling
- import numpy as np
- #need to average the x and y that are cloest. this becomes my new cluster point
- # Generate Samples
- from sklearn.datasets.samples_generator import make_blobs
- X, y_true = make_blobs(n_samples=300, centers=4,
- cluster_std=0.60, random_state=0)
- plt.scatter(X[:, 0], X[:, 1], s=50);
- x = X[:, 0]
- y = X[:, 1]
- #print(x)
- #print(y)
- #print(len(y))
- #print(len(x))
- ###############################################
- # YOUR CODE GOES HERE
- # Put some code to find clusters here
- # Assign the clusters and labels in your code
- ###############################################
- #print(range(len(X)))
- # Uncomment to display clusters and cluster centers
- #plt.scatter(X[:, 0], X[:, 1], c=labels,
- # s=50, cmap='viridis');
- #plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5);
- k = 4
- # k_mean_x_curr = np.zeros((k, k))
- k_mean_curr = np.zeros((k, 2))
- k_mean_y_curr = np.zeros((k, k))
- #print(k_mean_x_curr)
- k_mean_prev = np.zeros((2, k))
- #print(k_mean_curr)
- #Initialize cluster centroids
- centers = np.zeros((k, 2))
- for i in range(k):
- centers[i,0] = x[int(len(X)/k*i)]
- centers[i,1] = y[int(len(X)/k*i)]
- #print(centers)
- #Plot initial clusters
- plt.scatter(centers[:, 0], centers[:, 1], c='black', s=100, alpha=0.5);
- #plt.show()
- #Calculate euclidean distance from centroids
- dist = np.zeros((len(X), k+2))
- for j in range(len(X)): # for each point
- for i in range(k): # for each centroid
- dist[j][i] = np.sqrt((x[j] - centers[i][0])**2 + (y[j] - centers[i][1])**2)
- # populate minimum distance vector
- dist[j][4] = list(dist[j]).index(min(dist[j][0], dist[j][1], dist[j][2], dist[j][3]))
- print(dist)
- #Update Cluster Means
- for j in range(len(X)):
- n = int(dist[j][k+1])
- #print(n)
- #print(x[j])
- k_mean_x_curr[0][n] = x[j] + k_mean_x_curr[0][n] #x sum
- k_mean_y_curr[0][n] = y[j] + k_mean_x_curr[0][n] #y sum
- k_mean_x_curr[1][n] += 1 #x count
- k_mean_y_curr[1][n] += 1 #y count
- # #Calculate mean
- for i in range(k):
- if(k_mean_x_curr[1][i]==0):
- k_mean_x_curr[2][i] = 0
- else:
- k_mean_x_curr[2][i] = k_mean_x_curr[0][i] / k_mean_x_curr[1][i]
- if(k_mean_y_curr[1][i]==0):
- k_mean_y_curr[2][i] = 0
- else:
- k_mean_y_curr[2][i] = k_mean_y_curr[0][i] / k_mean_y_curr[1][i]
- # print(k_mean_x_curr)
- # print(k_mean_y_curr)
- #Update centers array with new means
- for i in range(k):
- # #print('before: ',centers[i][0])
- # #print(centers[i][1])
- centers[i][0] = k_mean_x_curr[2][i] #x
- centers[i][1] = k_mean_y_curr[2][i] #y
- #print('after: ',centers[i][0])
- #print(centers[i][1])
- plt.scatter(centers[:, 0], centers[:, 1], c='red', s=100, alpha=0.5);
- #plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement