Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- from sklearn.tree import DecisionTreeClassifier
- from sklearn.cross_validation import train_test_split
- from sklearn import datasets
- from sklearn.metrics import accuracy_score
- from sklearn.metrics import confusion_matrix
- from sklearn import preprocessing
- #Pull in breast cancer data
- bc=pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data',header=None)
- #Remove question marks and replace with NaN
- bc=bc.replace(to_replace='?',value=0)
- #Impute medians to address NaN
- imput=preprocessing.Imputer(missing_values='NaN',strategy='median')
- bc=imput.fit_transform(bc)
- bc=pd.DataFrame(bc)
- #Review head of data
- bc.head()
- # 1 2 3 4 5 6 7 8 9 10
- #0 5 1 1 1 2 1 3 1 1 2
- #1 5 4 4 5 7 10 3 2 1 2
- #2 3 1 1 1 2 2 3 1 1 2
- #3 6 8 8 1 3 4 3 7 1 2
- #4 4 1 1 3 2 1 3 1 1 2
- #Drop first column
- bc=bc.drop(0,1)
- #Create trainning and testing sets
- X=bc.ix[:,2:9]
- Y=bc.ix[:,10]
- X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
- #Here, we define the parameters of our tree
- tree = DecisionTreeClassifier(criterion = 'entropy', max_depth=5, random_state=0)
- # We then fit the tree to our training data
- tree.fit(X_train, Y_train)
- # Now we visualize our tree
- from sklearn.tree import export_graphviz
- export_graphviz(tree, out_file='wisc.dot',feature_names=['perimeter','area','smoothness','compactness','concavity','concave points','symmetry'])
- """ At this point go to your terminal and use the dot command to convert your .dot
- file to a .png file"""
- # Let's make a prediction
- y_pred=tree.predict(X_test)
- # Now we calculate our accuracy and create a confusion matrix of our results
- print('Accuracy: %.2f' % accuracy_score(Y_test,y_pred))
- confmat=confusion_matrix(y_true=Y_test, y_pred=y_pred)
- print(confmat)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement