Advertisement
Guest User

Untitled

a guest
May 22nd, 2019
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.21 KB | None | 0 0
  1. from sklearn.datasets import make_blobs
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. import random
  5. import math
  6.  
  7. def plot_k_means(x, r, k, centers):
  8. #random_colors = np.random.random((k, 3))
  9. #colors = r.dot(random_colors)
  10. #colors = ('black')
  11. print("Probabilities = ", r[:40])
  12. for center in centers:
  13. print(center)
  14. plt.plot(center[0], center[1],"ro")
  15. plt.scatter(x[:,0], x[:,1])
  16. plt.show()
  17.  
  18.  
  19. def initialize_centers(x, num_k):
  20. N, D = x.shape
  21. centers = np.zeros((num_k, D))
  22. used_idx = []
  23. for k in range(num_k):
  24. idx = np.random.choice(N)
  25. while idx in used_idx:
  26. idx = np.random.choice(N)
  27. used_idx.append(idx)
  28. centers[k] = x[idx]
  29. return centers
  30.  
  31. def update_centers(x, r, K):
  32. N, D = x.shape
  33. centers = np.zeros((K, D))
  34. for k in range(K):
  35. centers[k] = r[:, k].dot(x) / r[:, k].sum()
  36. return centers
  37.  
  38. def square_dist(a, b):
  39. return (a - b) ** 2
  40.  
  41. def distance(p0, p1):
  42. return math.sqrt((p0[0] - p1[0])**2 + (p0[1] - p1[1])**2)
  43.  
  44. def cost_func(x, r, centers, K):
  45. cost = 0
  46. for k in range(K):
  47. norm = np.linalg.norm(x - centers[k], 2)
  48. cost += (norm * np.expand_dims(r[:, k], axis=1) ).sum()
  49. return cost
  50.  
  51.  
  52. def cluster_responsibilities(centers, x, beta):
  53. N, _ = x.shape
  54. K, D = centers.shape
  55. R = np.zeros((N, K))
  56. for n in range(N):
  57. R[n] = np.exp(-beta * np.linalg.norm(centers - x[n], 2, axis=1))
  58. R /= R.sum(axis=1, keepdims=True)
  59.  
  60. return R
  61.  
  62. def soft_k_means(x, K, beta=1.):
  63. centers = initialize_centers(x, K)
  64. merge = False
  65. print("centers after initialize: ",centers)
  66. prev_cost = 0
  67. cost = 1
  68. distance_threshold = (1.0 / K)
  69. cost_threshold = 1e-2
  70. while True:
  71. #for _ in range(max_iters):
  72. r = cluster_responsibilities(centers, x, beta)
  73. centers = update_centers(x, r, K)
  74. idx = 0
  75. if merge:
  76. while idx < len(centers):
  77. j = 0
  78. while j < len(centers):
  79. dist = distance(centers[idx], centers[j])
  80. print 'Calculating Distance Between: ' + str(centers[idx]) + ' & ' + str(centers[j])
  81. print 'Distance: ' + str(dist)
  82. if dist <= distance_threshold and idx != j:
  83. print 'Removing!!'
  84. centers[idx] = [(centers[idx][0] + centers[j][0]) / 2, (centers[idx][1] + centers[j][1]) / 2]
  85. centers = np.delete(centers, j, 0)
  86. idx = -1
  87. break
  88. else:
  89. j += 1
  90. idx += 1
  91. K = len(centers)
  92. r = cluster_responsibilities(centers, x, beta)
  93. centers = update_centers(x, r, K)
  94.  
  95. print("Centers after update are: ", centers)
  96. cost = cost_func(x, r, centers, K)
  97. print("distance the centers moved= ", np.abs(cost - prev_cost))
  98. if np.abs(cost - prev_cost) < cost_threshold:
  99. print 'Breaking!! Final K:' + str(K)
  100. break
  101. prev_cost = cost
  102. K *= 2
  103. split_centers = np.zeros((K, x.shape[1]))
  104. print ("new centers: ", split_centers)
  105. for i, center in enumerate(centers):
  106. split_centers[i * 2] = center
  107. split_centers[(i * 2) + 1] = [center[0] + (random.randint(0, 100) * 0.01), center[1] + (random.randint(0, 100) * 0.01)]
  108. print("new center = ", split_centers)
  109. centers = split_centers
  110. merge = True
  111. print("Final centers before plot= ", centers)
  112. plot_k_means(x, r, K, centers)
  113.  
  114.  
  115. def generate_samples(std=.5, dim=2, dist=4):
  116. x, B2 = make_blobs(n_samples=100, centers=4, cluster_std=.1, random_state=12)
  117. #mu0 = np.array([0,0])
  118. #mu1 = np.array([dist, dist])
  119. #mu2 = np.array([0, dist])
  120. # num samps per class
  121. #Nc = 10
  122. #x0 = np.random.randn(Nc, dim) * std + mu0
  123. #x1 = np.random.randn(Nc, dim) * std + mu1
  124. #x2 = np.random.randn(Nc, dim) * std + mu2
  125. #x = np.concatenate((x0, x1, x2), axis=0)
  126. return x
  127.  
  128.  
  129. def main():
  130. x = generate_samples()
  131. K=2
  132. soft_k_means(x, K)
  133. #K = 2
  134. #while K < 10:
  135. #soft_k_means(x, K)
  136. #K+=1
  137.  
  138. if __name__ == "__main__":
  139. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement