Advertisement
Guest User

Untitled

a guest
Dec 12th, 2019
119
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.46 KB | None | 0 0
  1. from math import sqrt
  2. import random
  3. import numpy as np
  4.  
  5.  
  6. class Cluster:
  7.  
  8. def __init__(self, center, points):
  9. self.center = center
  10. self.points = points
  11.  
  12.  
  13. class KMeans:
  14.  
  15. def __init__(self, n_clusters, min_diff=1):
  16.  
  17. self.n_clusters = n_clusters
  18. self.min_diff = min_diff
  19.  
  20. self.clusters = []
  21.  
  22. def calculate_center(self, points):
  23. n_dim = len(points[0])
  24. vals = [0.0 for i in range(n_dim)]
  25. for p in points:
  26. for i in range(n_dim):
  27. vals[i] += p[i]
  28. coords = [(v / len(points)) for v in vals]
  29. return coords
  30.  
  31. def assign_points(self, clusters, points):
  32.  
  33. plists = [[] for i in range(self.n_clusters)]
  34.  
  35. for p in points:
  36. smallest_distance = float('inf')
  37.  
  38. for i in range(self.n_clusters):
  39. distance = euclidean(p, clusters[i].center)
  40. if distance < smallest_distance:
  41. smallest_distance = distance
  42. idx = i
  43.  
  44. plists[idx].append(p)
  45.  
  46. return plists
  47.  
  48. def fit(self, points):
  49. random.seed(0)
  50.  
  51. clusters = [Cluster(center=p, points=[p])
  52. for p in random.sample(points, self.n_clusters)]
  53.  
  54. while True:
  55.  
  56. plists = self.assign_points(clusters, points)
  57.  
  58. diff = 0
  59.  
  60. for i in range(self.n_clusters):
  61. if not plists[i]:
  62. continue
  63. old = clusters[i]
  64. center = self.calculate_center(plists[i])
  65. new = Cluster(center, plists[i])
  66. clusters[i] = new
  67. diff = max(diff, euclidean(old.center, new.center))
  68.  
  69. if diff < self.min_diff:
  70. break
  71.  
  72. self.clusters = clusters
  73. return clusters
  74.  
  75. def transform(self, points):
  76.  
  77. transformed_points = []
  78.  
  79. for p in points:
  80. smallest_distance = float('inf')
  81.  
  82. for i in range(self.n_clusters):
  83. distance = euclidean(p, self.clusters[i].center)
  84. if distance < smallest_distance:
  85. smallest_distance = distance
  86. idx = i
  87.  
  88. transformed_points.append(self.clusters[idx].center)
  89.  
  90. return transformed_points
  91.  
  92.  
  93. def euclidean(p, q):
  94. n_dim = len(p)
  95. return sqrt(sum([
  96. (p[i] - q[i]) ** 2 for i in range(n_dim)
  97. ]))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement