Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- from copy import deepcopy
- data = pd.read_csv('iris.csv')
- data.head()
- def distance(v1, v2, ax=1):
- return np.linalg.norm(v1 - v2, axis=ax)
- class KMeans:
- def __init__(self, n_clusters, max_iter=300):
- self.n_clusters = n_clusters
- def fit(self, X):
- n_samples = len(X)
- centroids = X[np.random.choice(X.shape[0], self.n_clusters, replace=False)]
- centroids_old = np.zeros(centroids.shape)
- clusters = np.zeros(n_samples)
- while True:
- for i in range(n_samples):
- distances = distance(X[i,], centroids)
- clusters[i] = distances.argmin()
- centroids_old = deepcopy(centroids)
- for k in range(self.n_clusters):
- centroids[k] = X[clusters == k,].mean(axis=0)
- error = distance(centroids, centroids_old, None)
- if error == 0:
- break
- self.clusters = clusters.astype('int')
- self.centroids = centroids
- def predict(self, y):
- # PUT YOUR COD HERE
- pass
- model = KMeans(3)
- X = (data.loc[:, data.columns != 'Name']).as_matrix()
- print(X)
- model.fit(X)
- print(model.clusters)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement