Advertisement
Guest User

Untitled

a guest
Jun 26th, 2019
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.19 KB | None | 0 0
  1. def distance(u, v):
  2. """
  3. Calculates Euclidean distance between two point
  4. distance = square_root( sum(u_i - v_i)^2 )
  5.  
  6. u: [float, float], point1
  7. v: [float, float], point2
  8. """
  9. sum_ = sum((u[i] - v[i]) ** 2 for i in range(len(u)))
  10. return sum_ ** (1 / 2)
  11.  
  12.  
  13. def get_closer(target, *args):
  14. """
  15. Return the closest point (from points in `args`) to target
  16.  
  17. target: [float], target point
  18. *args: [[float]], list of points
  19. """
  20. min_distance = float("inf")
  21. for point in args:
  22. d = distance(point, target)
  23. if d < min_distance:
  24. min_distance = d
  25. closer = point
  26. return closer
  27.  
  28.  
  29. def get_center(cluster):
  30. """
  31. Calculates the centroid point for `cluster`
  32.  
  33. cluster: [[float]], list of the points in cluster
  34. """
  35. center = []
  36. n = len(cluster)
  37. for i in range(len(cluster[0])):
  38. c = sum(p[i] for p in cluster) / n
  39. center.append(round(c, 1))
  40. return center
  41.  
  42.  
  43. def k_means(data, k=2, *centers):
  44. """
  45. Recursive k_means algorithm
  46.  
  47. data: [[float]], data points to consider for clustering
  48. k: int, number of clusters
  49. centers: [[float]], optiona - initial centroids
  50. """
  51. centers = list(centers) if centers else [data[i] for i in range(k)]
  52. clusters = [[] for _ in range(k)]
  53. for point in data:
  54. nearest = get_closer(point, *centers)
  55. nearest_cluster_index = centers.index(nearest)
  56. clusters[nearest_cluster_index].append(point)
  57.  
  58. new_centers = [get_center(cluster) for cluster in clusters]
  59. if centers == new_centers: return clusters, centers
  60. return k_means(data, k, *new_centers)
  61.  
  62.  
  63. # -- Test
  64. >>> weights = [74, 77, 81, 76, 80, 91, 88, 93, 88, 92]
  65. >>> heights = [179, 182, 181, 175, 174, 182, 178, 178, 174, 173]
  66. >>> data = [list(point) for point in zip(weights, heights)]
  67. >>> data
  68. [[74, 179], [77, 182], [81, 181], [76, 175], [80, 174], [91, 182], [88, 178], [93, 178], [88, 174], [92, 173]]
  69.  
  70. >>> clusters, centroids = k_means(data)
  71. >>> for c in clusters: print(c)
  72. [[74, 179], [77, 182], [81, 181], [76, 175], [80, 174]]
  73. [[91, 182], [88, 178], [93, 178], [88, 174], [92, 173]]
  74. >>> for c in centroids: print(c)
  75. [77.6, 178.2]
  76. [90.4, 177.0]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement