SHARE
TWEET

Untitled

a guest Mar 19th, 2019 60 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Thu Mar 14 14:17:49 2019
  4.  
  5. @author: Ricky Hu
  6. """
  7.  
  8. from sklearn.datasets import load_breast_cancer
  9. from sklearn.neighbors import KNeighborsClassifier     #KNN
  10. from sklearn.linear_model import LogisticRegression    #Logistic Regression
  11. from sklearn.tree import DecisionTreeClassifier        #Decision Tree
  12. from sklearn.ensemble import RandomForestClassifier    #Random Forest
  13. from sklearn.neural_network import MLPClassifier       #Neural Network
  14. from sklearn.svm import SVC                            #SVM
  15. from sklearn.model_selection import train_test_split
  16. from sklearn.preprocessing import StandardScaler
  17. from sklearn.tree import export_graphviz
  18. import matplotlib.pylab as plt
  19. import numpy as np
  20. import graphviz
  21.  
  22. %matplotlib inline
  23. #%%
  24. #load the breast cancer data
  25. cancer = load_breast_cancer()
  26. print(cancer.keys())
  27. print(cancer.DESCR)
  28.  
  29. #%%
  30. #print feature names to visualize
  31. print(cancer.feature_names)
  32. #%%
  33. #print target names to visualize
  34. print(cancer.target_names)
  35. #%%
  36. #look at dimensions of dataset
  37. type(cancer.data)
  38. cancer.data.shape
  39. #%%
  40. #plotting 2D of texture and perimeter
  41. fig = plt.figure(figsize=(8,6))
  42. plt.scatter(cancer.data[:,1], cancer.data[:,2], c=cancer.target)
  43. plt.xlabel(str(cancer.feature_names[1]))
  44. plt.ylabel(str(cancer.feature_names[2]))
  45. plt.show()
  46. #%%
  47.  
  48. #----------------Logistic Regression
  49. X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, stratify=cancer.target, random_state=42)
  50.  
  51. log_reg = LogisticRegression()
  52. log_reg.fit(X_train, y_train)
  53.  
  54. print('Accuracy on the training set: {:.3f}'.format(log_reg.score(X_train,y_train)))
  55. print('Accuracy on the training set: {:.3f}'.format(log_reg.score(X_test,y_test)))
  56.  
  57. #%%
  58.  
  59. #----------------- Decision Tree
  60. X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=42)
  61.  
  62. training_accuracy = []
  63. test_accuracy = []
  64.  
  65. max_dep = range(1,15)
  66.  
  67. for md in max_dep:
  68.     tree = DecisionTreeClassifier(max_depth=md,random_state=0)
  69.     tree.fit(X_train,y_train)
  70.     training_accuracy.append(tree.score(X_train, y_train))
  71.     test_accuracy.append(tree.score(X_test, y_test))
  72.  
  73. plt.plot(max_dep,training_accuracy, label='Accuracy of the training set')
  74. plt.plot(neighbors_setting,test_accuracy, label='Accuracy of the test set')
  75. plt.ylabel('Accuracy')
  76. plt.xlabel('Max Depth')
  77. plt.legend()
  78.  
  79. # By having larger max_depth (>5), we overfit the model into training data, so the accuracy for training set become
  80. # but the accuracy for test set decrease
  81.  
  82. # other parameters than can work with:
  83. # - min_samples_leaf, max_sample_leaf
  84. # - max_leaf_node
  85.  
  86. # by looking at plot, best result accurs when max_depth is 3
  87.  
  88. #%%
  89. #exporting deciison tree
  90.  
  91. export_graphviz(tree, out_file=r"C:\Users\Ricky Hu\Desktop\ml\cancerTree.dot", class_names=['malignant','benign'], feature_names=cancer.feature_names, impurity=False, filled=True)
  92. #%%
  93.  
  94. print('Feature importances: {}'.format(tree.feature_importances_))
  95. type(tree.feature_importances_)
  96. #%%
  97.  
  98. #Feature Importance
  99. n_feature = cancer.data.shape[1]
  100. plt.barh(range(n_feature), tree.feature_importances_, align='center')
  101. plt.yticks(np.arange(n_feature), cancer.feature_names)
  102. plt.xlabel('Feature Importance')
  103. plt.ylabel('Feature')
  104. plt.show()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top