Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- from sklearn.datasets import load_iris
- from sklearn.cluster import KMeans
- from sklearn.tree import DecisionTreeClassifier
- from sklearn.model_selection import train_test_split
- data = load_iris()
- X = pd.DataFrame(data['data'], columns=data['feature_names'])
- y = data['target']
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
- km = KMeans(n_clusters=3)
- km.fit(X_train)
- training_dist = km.transform(X_train)
- X_train['dist1'] = training_dist[:,0]
- X_train['dist2'] = training_dist[:,1]
- X_train['dist3'] = training_dist[:,2]
- clf = DecisionTreeClassifier()
- clf.fit(X_train, y_train)
- # new data
- test_dist = km.transform(X_test)
- X_test['dist1'] = test_dist[:,0]
- X_test['dist2'] = test_dist[:,1]
- X_test['dist3'] = test_dist[:,2]
- clf.predict(X_test)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement