Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def cluster_kmedians(dataset, k):
- # (1) 중심점 초기화
- min_x = dataset[:,0].min()
- max_x = dataset[:,0].max()
- min_y = dataset[:,1].min()
- max_y = dataset[:,1].max()
- center_x = np.random.uniform(low=min_x, high=max_x, size=k)
- center_y = np.random.uniform(low=min_y, high=max_y, size=k)
- centroids = np.stack([center_x,center_y],axis=-1)
- # (2) ~ (5) 순회
- num_data = dataset.shape[0]
- cluster_per_point = np.zeros((num_data)) # 각 점 별 군집
- counter = 0
- while True:
- prev_cluster_per_point = cluster_per_point
- # (2) 거리 계산
- diff_mat = (centroids.reshape(-1,1,2) -\
- dataset.reshape(1,-1,2))
- dists = np.sqrt((np.abs(diff_mat)).sum(axis=-1))
- # (3) 각 데이터를 거리가 가장 가까운 군집으로 할당
- cluster_per_point = dists.argmin(axis=0)
- # (4) 각 군집 별 점들의 평균을 계산 후, 군집의 중심점을 다시 계산
- for i in range(k):
- centroids[i] = np.median(dataset[cluster_per_point==i],
- axis=0)
- if np.all(prev_cluster_per_point == cluster_per_point):
- break
- counter += 1
- plt.title("{}th Distribution of Dataset".format(counter))
- for idx, color in enumerate(['r','g','b','y']):
- mask = (cluster_per_point==idx)
- plt.scatter(dataset[mask,0],dataset[mask,1],
- label='dataset', c=color)
- plt.scatter(centroids[:,0],centroids[:,1],
- s=200, label="centroid", marker='+')
- plt.show()
- return centroids
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement