Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- from sklearn.cluster import MiniBatchKMeans
- from sklearn.model_selection import train_test_split
- pitch_data = pd.read_csv('/Users/juliekent/PycharmProjects/clusters/pitches.csv')
- x = pitch_data['px'].round(2).dropna()
- z = pitch_data['pz'].round(2).dropna()
- combined = pd.concat([x, z], axis=1)
- print(combined)
- kmeans = MiniBatchKMeans(n_clusters=3, batch_size=1000)
- kmeans.fit(combined)
- # cluster assignments
- print('{}\n'.format(repr(kmeans.labels_)))
- # centroids
- print('{}\n'.format(repr(kmeans.cluster_centers_)))
- import numpy as np
- new_obs = np.array([(0.5, 1.2), (0.42, 2.5), (-0.19, 2.0) ])
- # predict clusters
- print('{}\n'.format(repr(kmeans.predict(new_obs))))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement