Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import math
- import numpy as np
- import matplotlib.pyplot as plt
- from random import randint
- def distance(cluster, point):
- return math.sqrt( ((cluster[0] - point[0]) ** 2) + ((cluster[1] - point[1]) ** 2) )
- def distances_from_center(center, data):
- distances = []
- for p in dataset:
- # voor elk punt de afstand tot het huidige cluster centrum
- distances.append(distance(c, p))
- return distances
- def calculate_new_center(cluster):
- new_center_x = sum([row[0] for row in cluster]) / len(cluster)
- new_center_y = sum([row[1] for row in cluster]) / len(cluster)
- return [new_center_x, new_center_y]
- dataset = []
- for i in range(0,100000):
- dataset.append([randint(0,500), randint(0,500)])
- cluster_count = 5
- cluster_centers = []
- clusters = []
- for i in range(cluster_count):
- cluster_centers.append(dataset[i])
- clusters.append([])
- done = False
- last_distance_matrix = []
- while not done:
- distance_matrix = []
- for i in range(0, len(clusters)):
- clusters[i] = []
- # Voor elke cluster centrum de afstanden berekenen
- # Dit kan in parallel
- for c in cluster_centers:
- distance_matrix.append(distances_from_center(c, dataset))
- # Distance_matrix omzetten naar een numpy array
- distance_matrix = np.array(distance_matrix)
- # Datapunten toewijzen aan cluster_centers
- # Dit kan in parallel
- for p in range(0, len(distance_matrix[0])):
- min_index = np.argmin(distance_matrix[:, p])
- clusters[min_index].append(dataset[p])
- # Nieuwe cluster_centers berekenen
- # Kan in parallel
- for x in range(0, len(cluster_centers)):
- cluster_centers[x] = calculate_new_center(clusters[x])
- if np.all(distance_matrix == last_distance_matrix):
- print("done")
- done = True
- else:
- print("not done")
- last_distance_matrix = distance_matrix
- for c in clusters:
- x_data = [r[0] for r in c]
- y_data = [r[1] for r in c]
- plt.scatter(x_data, y_data)
- x_clusters = [r[0] for r in cluster_centers]
- y_clusters = [r[1] for r in cluster_centers]
- plt.scatter(x_clusters, y_clusters)
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement