Untitled

import pandas as pd
import pickle
from sklearn.cluster import KMeans

frames = [pd.read_hdf(fin) for fin in ifiles]
data = pd.concat(frames, ignore_index=True, axis=0)
data.dropna(inplace=True)

k = 12
x  = pd.concat(data['A'], data['B'], data['C'], axis=1, keys=['A','B','C'])
model = KMeans(n_clusters=k, random_state=0, n_jobs = -2)
model.fit(x)

pickle.dump(model, open(filename, 'wb'))

array([[-2.26732099,  0.24895614,  2.34840191],
   [-2.26732099,  0.22270912,  1.88942378],
   [-1.99246557,  0.04154312,  2.63458941],
   ...,
   [-4.29596287,  1.97036309, -0.22767511],
   [-4.26055474,  1.72347591, -0.18185197],
   [-4.15980382,  1.73176239, -0.30781225]])

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
n_clusters=12, n_init=10, n_jobs=-2, precompute_distances='auto',
random_state=0, tol=0.0001, verbose=0)

modelnew = pickle.load(open('test.pkl', 'rb'))
modelnew.predict(x)