Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def kmeans(X, k):
- def getDistance(P1, P2): return np.sqrt((P2[0] - P1[0])**2 + (P2[1] - P1[1])**2)
- x_MIN, x_MAX, y_MIN, y_MAX = min(X[:, 0]), max(X[:, 0]), min(X[:, 1]), max(X[:, 1])
- C = [(np.random.randint(x_MIN, x_MAX), np.random.randint(y_MIN, y_MAX)) for i in range(k)]
- # Diccionario con clusters como clave y sus puntos asociados como valor:
- clustersPoints = {i: [] for i in range(k)}
- continueSearching = True
- while continueSearching:
- continueSearching = False
- for dataPoint in X:
- # Distancias entre nuestro punto y cada cluster:
- clustersDistances = [getDistance(cluster, dataPoint) for cluster in C]
- nearestCluster = clustersDistances.index(min(clustersDistances)) # Seleccionando el cluster más cercano.
- clustersPoints[nearestCluster].append(dataPoint)
- i = 0
- for cluster in clustersPoints:
- sumX, sumY = 0, 0
- currentPoints = clustersPoints[cluster]
- for currentPoint in currentPoints:
- sumX += currentPoint[0]
- sumY += currentPoint[1]
- # Calculando las medias.
- newCluster = sumX/len(currentPoints), sumY/len(currentPoints)
- # Si el nuevo Cluster no está MUY cerca del anterior, actualizamos el cluster.
- if getDistance(C[i], newCluster) > 0.001:
- C[i] = newCluster
- continueSearching = True
- i += 1
- y = []
- for clusterNumber in clustersPoints:
- currentCluster = C[clusterNumber]
- for clusterPoint in clustersPoints[clusterNumber]:
- y.append([tuple(clusterPoint), currentCluster])
- return C, y
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement