daily pastebin goal
55%
SHARE
TWEET

Untitled

a guest Apr 15th, 2018 20 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #This code performs the classification  of heart  disease by labeling the predicted values
  2. # in various classes, namely 0 for absence and 1 to 4 for presence and also try  
  3. # to check the model performance by comparing it against other Classifiers
  4.  
  5. from numpy import genfromtxt
  6. import numpy as np
  7. import matplotlib
  8. matplotlib.use('Agg')
  9. import matplotlib.pyplot as plt
  10. from sklearn.svm import LinearSVC
  11. from sklearn.decomposition import PCA
  12. import pylab as pl
  13. from itertools import cycle
  14. from sklearn import cross_validation
  15. from sklearn.svm import SVC
  16.  
  17. #Loading and pruning the data
  18. dataset = genfromtxt('cleveland_data.csv',dtype = float, delimiter=',')
  19. #print dataset
  20. X = dataset[:,0:12] #Feature Set
  21. y = dataset[:,13]   #Label Set
  22.  
  23. #Method to plot the graph for reduced Dimesions
  24. def plot_2D(data, target, target_names):
  25.      colors = cycle('rgbcmykw')
  26.      target_ids = list(range(len(target_names)))
  27.      plt.figure()
  28.      for i, c, label in zip(target_ids, colors, target_names):
  29.          plt.scatter(data[target == i, 0], data[target == i, 1],
  30.                     c=c, label=label)
  31.      plt.legend()
  32.      plt.savefig('Reduced_PCA_Graph')
  33.  
  34. # Classifying the data using a Linear SVM and predicting the probability of disease belonging to a particular class
  35. modelSVM = LinearSVC(C=0.001)
  36. pca = PCA(n_components=5, whiten=True).fit(X)
  37. X_new = pca.transform(X)
  38.  
  39. # calling plot_2D
  40. target_names = ['0','1','2','3','4']
  41. plot_2D(X_new, y, target_names)
  42.  
  43. #Applying cross validation on the training and test set for validating our Linear SVM Model
  44. X_train, X_test, y_train, y_test = cross_validation.train_test_split(X_new, y, test_size=0.4, train_size=0.6, random_state=0)
  45. modelSVM = modelSVM.fit(X_train, y_train)
  46. print("Testing  Linear SVC values using Split")
  47. print(modelSVM.score(X_test, y_test))
  48.  
  49. # prediction score based on X_new
  50. modelSVMRaw = LinearSVC(C=0.001)
  51. modelSVMRaw = modelSVMRaw.fit(X_new, y)
  52. cnt = 0
  53. for i in modelSVMRaw.predict(X_new):
  54.     if i == y[i]:
  55.        cnt = cnt+1
  56. print("Score without any split")
  57. print(float(cnt)/303)
  58.  
  59.  
  60. # printing the Likelihood of disease belonging to a particular class
  61. # predicting the outcome
  62. count0 = 0
  63. count1 = 0
  64. count2 = 0
  65. count3 = 0
  66. count4 = 0
  67. for i in modelSVM.predict(X_new):
  68.         if i == 0:
  69.                 count0 = count0+1;
  70.         elif i == 1:
  71.                 count1 = count1+1;
  72.         elif i == 2:
  73.                 count2 = count2+1;
  74.         elif i == 3:
  75.                 count3 = count3+1;
  76.         elif modelSVM.predict(i) ==4:
  77.                 count4 = count4+1
  78. total = count0+count1+count2+count3+count4
  79. #Predicting the Likelihood
  80. print("The prediction is as follows:")
  81. print(" Likelihood of belonging to Class 0 is", float(count0)/total)
  82. print(" Likelihood of belonging to Class 1 is", float(count1)/total)
  83. print(" Likelihood of belonging to Class 2 is", float(count2)/total)
  84. print(" Likelihood of belonging to Class 3 is", float(count3)/total)
  85. print(" Likelihood of belonging to Class 4 is", float(count4)/total)
  86.  
  87.  
  88. #Applying the Principal Component Analysis on the data features
  89. modelSVM2 = SVC(C=0.001,kernel='rbf')
  90.  
  91. #Applying cross validation on the training and test set for validating our Linear SVM Model
  92. X_train1, X_test1, y_train1, y_test1 = cross_validation.train_test_split(X_new, y, test_size=0.4, train_size=0.6, random_state=0)
  93. modelSVM2 = modelSVM2.fit(X_train1, y_train1)
  94. print("Testing with RBF using split")
  95. print(modelSVM2.score(X_test1, y_test1))
  96.  
  97. modelSVM2Raw = SVC(C=0.001,kernel='rbf')
  98. modelSVM2Raw = modelSVM2Raw.fit(X_new, y)
  99. cnt1 = 0
  100. for i in modelSVM2Raw.predict(X_new):
  101.         if i == y[i]:
  102.            cnt1 = cnt1+1
  103. print("RBF Score without split")
  104. print(float(cnt1)/303)
  105.  
  106. #Using Stratified K Fold
  107. skf = cross_validation.StratifiedKFold(y, n_folds=5)
  108. for train_index, test_index in skf:
  109.    # print("TRAIN:", train_index, "TEST:", test_index)
  110.     X_train3, X_test3 = X[train_index], X[test_index]
  111.     y_train3, y_test3 = y[train_index], y[test_index]
  112. modelSVM3 = SVC(C=0.001,kernel='rbf')
  113. modelSVM3 = modelSVM3.fit(X_train3, y_train3)
  114. print("Testing using stratified with K folds")
  115. print(modelSVM3.score(X_test3, y_test3))
  116.  
  117. modelSVM3Raw = SVC(C=0.001,kernel='rbf')
  118. modelSVM3Raw = modelSVM3Raw.fit(X_new, y)
  119. cnt2 = 0
  120. for i in modelSVM3Raw.predict(X_new):
  121.         if i == y[i]:
  122.            cnt2 = cnt2+1
  123. print("Stratified K Fold score on X_New")
  124. print(float(cnt2)/303)
RAW Paste Data
Top