Advertisement
Guest User

Untitled

a guest
Mar 20th, 2019
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.69 KB | None | 0 0
  1. def cluster_kmedians(dataset, k):    
  2.     # (1) 중심점 초기화
  3.     min_x = dataset[:,0].min()
  4.     max_x = dataset[:,0].max()
  5.     min_y = dataset[:,1].min()
  6.     max_y = dataset[:,1].max()
  7.  
  8.     center_x = np.random.uniform(low=min_x, high=max_x, size=k)
  9.     center_y = np.random.uniform(low=min_y, high=max_y, size=k)
  10.     centroids = np.stack([center_x,center_y],axis=-1)
  11.    
  12.     # (2) ~ (5) 순회
  13.     num_data = dataset.shape[0]
  14.     cluster_per_point = np.zeros((num_data)) # 각 점 별 군집
  15.  
  16.     counter = 0
  17.     while True:
  18.         prev_cluster_per_point = cluster_per_point
  19.        
  20.         # (2) 거리 계산
  21.         diff_mat = (centroids.reshape(-1,1,2) -\
  22.                     dataset.reshape(1,-1,2))
  23.         dists = np.sqrt((np.abs(diff_mat)).sum(axis=-1))
  24.         # (3) 각 데이터를 거리가 가장 가까운 군집으로 할당
  25.         cluster_per_point = dists.argmin(axis=0)
  26.        
  27.         # (4) 각 군집 별 점들의 평균을 계산 후, 군집의 중심점을 다시 계산
  28.         for i in range(k):
  29.             centroids[i] = np.median(dataset[cluster_per_point==i],
  30.                                      axis=0)
  31.  
  32.         if np.all(prev_cluster_per_point == cluster_per_point):
  33.             break
  34.  
  35.         counter += 1
  36.         plt.title("{}th Distribution of Dataset".format(counter))
  37.         for idx, color in enumerate(['r','g','b','y']):
  38.             mask = (cluster_per_point==idx)
  39.             plt.scatter(dataset[mask,0],dataset[mask,1],
  40.                         label='dataset', c=color)
  41.             plt.scatter(centroids[:,0],centroids[:,1],
  42.                         s=200, label="centroid", marker='+')
  43.         plt.show()
  44.    
  45.     return centroids
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement