Advertisement
Guest User

Untitled

a guest
Mar 20th, 2019
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.80 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. from sklearn.datasets import load_iris
  4. from sklearn.cluster import KMeans
  5. from sklearn.tree import DecisionTreeClassifier
  6. from sklearn.model_selection import train_test_split
  7.  
  8. data = load_iris()
  9.  
  10. X = pd.DataFrame(data['data'], columns=data['feature_names'])
  11. y = data['target']
  12.  
  13. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
  14.  
  15. km = KMeans(n_clusters=3)
  16. km.fit(X_train)
  17. training_dist = km.transform(X_train)
  18.  
  19. X_train['dist1'] = training_dist[:,0]
  20. X_train['dist2'] = training_dist[:,1]
  21. X_train['dist3'] = training_dist[:,2]
  22.  
  23. clf = DecisionTreeClassifier()
  24. clf.fit(X_train, y_train)
  25.  
  26. # new data
  27. test_dist = km.transform(X_test)
  28.  
  29. X_test['dist1'] = test_dist[:,0]
  30. X_test['dist2'] = test_dist[:,1]
  31. X_test['dist3'] = test_dist[:,2]
  32.  
  33. clf.predict(X_test)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement