Advertisement
ayush3504

Python DT Classwork (Wisconsin BC)

Feb 9th, 2016
109
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.84 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. from sklearn.tree import DecisionTreeClassifier
  4. from sklearn.cross_validation import train_test_split
  5. from sklearn import datasets
  6. from sklearn.metrics import accuracy_score
  7. from sklearn.metrics import confusion_matrix
  8. from sklearn import preprocessing
  9.  
  10. #Pull in breast cancer data
  11. bc=pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data',header=None)
  12.  
  13. #Remove question marks and replace with NaN
  14. bc=bc.replace(to_replace='?',value=0)
  15.  
  16. #Impute medians to address NaN
  17. imput=preprocessing.Imputer(missing_values='NaN',strategy='median')
  18. bc=imput.fit_transform(bc)
  19. bc=pd.DataFrame(bc)
  20. #Review head of data
  21. bc.head()
  22. # 1 2 3 4 5 6 7 8 9 10
  23. #0 5 1 1 1 2 1 3 1 1 2
  24. #1 5 4 4 5 7 10 3 2 1 2
  25. #2 3 1 1 1 2 2 3 1 1 2
  26. #3 6 8 8 1 3 4 3 7 1 2
  27. #4 4 1 1 3 2 1 3 1 1 2
  28.  
  29. #Drop first column
  30. bc=bc.drop(0,1)
  31.  
  32. #Create trainning and testing sets
  33. X=bc.ix[:,2:9]
  34. Y=bc.ix[:,10]
  35. X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
  36.  
  37. #Here, we define the parameters of our tree
  38. tree = DecisionTreeClassifier(criterion = 'entropy', max_depth=5, random_state=0)
  39.  
  40. # We then fit the tree to our training data
  41. tree.fit(X_train, Y_train)
  42.  
  43. # Now we visualize our tree
  44. from sklearn.tree import export_graphviz
  45. export_graphviz(tree, out_file='wisc.dot',feature_names=['perimeter','area','smoothness','compactness','concavity','concave points','symmetry'])
  46.  
  47. """ At this point go to your terminal and use the dot command to convert your .dot
  48. file to a .png file"""
  49.  
  50. # Let's make a prediction
  51. y_pred=tree.predict(X_test)
  52.  
  53. # Now we calculate our accuracy and create a confusion matrix of our results
  54. print('Accuracy: %.2f' % accuracy_score(Y_test,y_pred))
  55. confmat=confusion_matrix(y_true=Y_test, y_pred=y_pred)
  56. print(confmat)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement