Advertisement
Guest User

Untitled

a guest
Apr 19th, 2018
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.14 KB | None | 0 0
  1. import math
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. from random import randint
  5.  
  6. def distance(cluster, point):
  7.     return math.sqrt( ((cluster[0] - point[0]) ** 2) + ((cluster[1] - point[1]) ** 2) )
  8.  
  9. def distances_from_center(center, data):
  10.     distances = []
  11.     for p in dataset:
  12.         # voor elk punt de afstand tot het huidige cluster centrum
  13.         distances.append(distance(c, p))
  14.     return distances
  15.  
  16. def calculate_new_center(cluster):
  17.     new_center_x = sum([row[0] for row in cluster]) / len(cluster)
  18.     new_center_y = sum([row[1] for row in cluster]) / len(cluster)
  19.     return [new_center_x, new_center_y]
  20.  
  21. dataset = []
  22. for i in range(0,100000):
  23.     dataset.append([randint(0,500), randint(0,500)])
  24. cluster_count = 5
  25. cluster_centers = []
  26. clusters = []
  27.  
  28. for i in range(cluster_count):
  29.     cluster_centers.append(dataset[i])
  30.     clusters.append([])
  31.  
  32. done = False
  33.  
  34. last_distance_matrix = []
  35. while not done:
  36.     distance_matrix = []
  37.     for i in range(0, len(clusters)):
  38.         clusters[i] = []
  39.  
  40.     # Voor elke cluster centrum de afstanden berekenen
  41.     # Dit kan in parallel
  42.     for c in cluster_centers:
  43.         distance_matrix.append(distances_from_center(c, dataset))
  44.  
  45.     # Distance_matrix omzetten naar een numpy array
  46.     distance_matrix = np.array(distance_matrix)
  47.  
  48.     # Datapunten toewijzen aan cluster_centers
  49.     # Dit kan in parallel
  50.     for p in range(0, len(distance_matrix[0])):
  51.         min_index = np.argmin(distance_matrix[:, p])
  52.         clusters[min_index].append(dataset[p])
  53.  
  54.     # Nieuwe cluster_centers berekenen
  55.     # Kan in parallel
  56.     for x in range(0, len(cluster_centers)):
  57.         cluster_centers[x] =  calculate_new_center(clusters[x])
  58.  
  59.     if np.all(distance_matrix == last_distance_matrix):
  60.         print("done")
  61.         done = True
  62.     else:
  63.         print("not done")
  64.         last_distance_matrix = distance_matrix
  65. for c in clusters:
  66.     x_data = [r[0] for r in c]
  67.     y_data = [r[1] for r in c]
  68.     plt.scatter(x_data, y_data)
  69.  
  70. x_clusters = [r[0] for r in cluster_centers]
  71. y_clusters = [r[1] for r in cluster_centers]
  72. plt.scatter(x_clusters, y_clusters)
  73. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement