K_Means

def kmeans(X, k):
    def getDistance(P1, P2): return np.sqrt((P2[0] - P1[0])**2 + (P2[1] - P1[1])**2)
    x_MIN, x_MAX, y_MIN, y_MAX = min(X[:, 0]), max(X[:, 0]), min(X[:, 1]), max(X[:, 1])
    C = [(np.random.randint(x_MIN, x_MAX), np.random.randint(y_MIN, y_MAX)) for i in range(k)]
    # Diccionario con clusters como clave y sus puntos asociados como valor:
    clustersPoints = {i: [] for i in range(k)}
    continueSearching = True
    while continueSearching:
        continueSearching = False
        for dataPoint in X:
            # Distancias entre nuestro punto y cada cluster:
            clustersDistances = [getDistance(cluster, dataPoint) for cluster in C]
            nearestCluster = clustersDistances.index(min(clustersDistances)) # Seleccionando el cluster más cercano.
            clustersPoints[nearestCluster].append(dataPoint)
        i = 0
        for cluster in clustersPoints:
            sumX, sumY = 0, 0
            currentPoints = clustersPoints[cluster]
            for currentPoint in currentPoints:
                sumX += currentPoint[0]
                sumY += currentPoint[1]
            # Calculando las medias.
            newCluster = sumX/len(currentPoints), sumY/len(currentPoints)
            # Si el nuevo Cluster no está MUY cerca del anterior, actualizamos el cluster.
            if getDistance(C[i], newCluster) > 0.001:
                C[i] = newCluster
                continueSearching = True
            i += 1

    y = []
    for clusterNumber in clustersPoints:
        currentCluster = C[clusterNumber]
        for clusterPoint in clustersPoints[clusterNumber]:
            y.append([tuple(clusterPoint), currentCluster])

    return C, y