SHARE
TWEET

Untitled

a guest May 22nd, 2019 54 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from sklearn.datasets import make_blobs
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. import random
  5. import math
  6.  
  7. def plot_k_means(x, r, k, centers):
  8.     #random_colors = np.random.random((k, 3))
  9.     #colors = r.dot(random_colors)
  10.     #colors = ('black')
  11.     print("Probabilities = ", r[:40])
  12.     for center in centers:
  13.         print(center)
  14.         plt.plot(center[0], center[1],"ro")
  15.     plt.scatter(x[:,0], x[:,1])
  16.     plt.show()
  17.  
  18.  
  19. def initialize_centers(x, num_k):
  20.     N, D = x.shape
  21.     centers = np.zeros((num_k, D))
  22.     used_idx = []
  23.     for k in range(num_k):
  24.         idx = np.random.choice(N)
  25.         while idx in used_idx:
  26.             idx = np.random.choice(N)
  27.         used_idx.append(idx)
  28.         centers[k] = x[idx]
  29.     return centers
  30.  
  31. def update_centers(x, r, K):
  32.     N, D = x.shape
  33.     centers = np.zeros((K, D))
  34.     for k in range(K):
  35.         centers[k] = r[:, k].dot(x) / r[:, k].sum()
  36.     return centers
  37.  
  38. def square_dist(a, b):
  39.     return (a - b) ** 2
  40.  
  41. def distance(p0, p1):
  42.     return math.sqrt((p0[0] - p1[0])**2 + (p0[1] - p1[1])**2)
  43.  
  44. def cost_func(x, r, centers, K):
  45.     cost = 0
  46.     for k in range(K):
  47.         norm = np.linalg.norm(x - centers[k], 2)
  48.         cost += (norm * np.expand_dims(r[:, k], axis=1) ).sum()
  49.     return cost
  50.  
  51.  
  52. def cluster_responsibilities(centers, x, beta):
  53.     N, _ = x.shape
  54.     K, D = centers.shape
  55.     R = np.zeros((N, K))
  56.     for n in range(N):        
  57.         R[n] = np.exp(-beta * np.linalg.norm(centers - x[n], 2, axis=1))
  58.     R /= R.sum(axis=1, keepdims=True)
  59.  
  60.     return R
  61.  
  62. def soft_k_means(x, K, beta=1.):
  63.     centers = initialize_centers(x, K)
  64.     merge = False
  65.     print("centers after initialize: ",centers)
  66.     prev_cost = 0
  67.     cost = 1
  68.     distance_threshold = (1.0 / K)
  69.     cost_threshold = 1e-2
  70.     while True:
  71.     #for _ in range(max_iters):
  72.         r = cluster_responsibilities(centers, x, beta)
  73.         centers = update_centers(x, r, K)
  74.         idx = 0
  75.         if merge:
  76.             while idx < len(centers):
  77.                 j = 0
  78.                 while j < len(centers):
  79.                     dist = distance(centers[idx], centers[j])
  80.                     print 'Calculating Distance Between: ' + str(centers[idx]) + ' & ' + str(centers[j])
  81.                     print 'Distance: ' + str(dist)
  82.                     if dist <= distance_threshold and idx != j:
  83.                         print 'Removing!!'
  84.                         centers[idx] = [(centers[idx][0] + centers[j][0]) / 2, (centers[idx][1] + centers[j][1]) / 2]
  85.                         centers = np.delete(centers, j, 0)
  86.                         idx = -1
  87.                         break
  88.                     else:
  89.                         j += 1
  90.                 idx += 1
  91.             K = len(centers)
  92.             r = cluster_responsibilities(centers, x, beta)
  93.             centers = update_centers(x, r, K)
  94.            
  95.         print("Centers after update are: ", centers)
  96.         cost = cost_func(x, r, centers, K)
  97.         print("distance the centers moved= ", np.abs(cost - prev_cost))
  98.         if np.abs(cost - prev_cost) < cost_threshold:
  99.             print 'Breaking!! Final K:' + str(K)
  100.             break
  101.         prev_cost = cost
  102.         K *= 2
  103.         split_centers = np.zeros((K, x.shape[1]))
  104.         print ("new centers: ", split_centers)
  105.         for i, center in enumerate(centers):
  106.             split_centers[i * 2] = center
  107.             split_centers[(i * 2) + 1] = [center[0] + (random.randint(0, 100) * 0.01), center[1] + (random.randint(0, 100) * 0.01)]
  108.             print("new center = ", split_centers)
  109.         centers = split_centers
  110.         merge = True
  111.     print("Final centers before plot= ", centers)
  112.     plot_k_means(x, r, K, centers)
  113.  
  114.  
  115. def generate_samples(std=.5, dim=2, dist=4):
  116.     x, B2 = make_blobs(n_samples=100, centers=4, cluster_std=.1, random_state=12)
  117.     #mu0 = np.array([0,0])
  118.     #mu1 = np.array([dist, dist])
  119.     #mu2 = np.array([0, dist])
  120.     # num samps per class
  121.     #Nc = 10
  122.     #x0 = np.random.randn(Nc, dim) * std + mu0
  123.     #x1 = np.random.randn(Nc, dim) * std + mu1
  124.     #x2 = np.random.randn(Nc, dim) * std + mu2
  125.     #x = np.concatenate((x0, x1, x2), axis=0)
  126.     return x
  127.    
  128.  
  129. def main():
  130.     x = generate_samples()
  131.     K=2
  132.     soft_k_means(x, K)
  133.     #K = 2
  134.     #while K < 10:
  135.         #soft_k_means(x, K)
  136.         #K+=1
  137.  
  138. if __name__ == "__main__":
  139.     main()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top