Advertisement
Moinak

Assignment_5_main.py

Feb 19th, 2020
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.20 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. from sklearn.model_selection import KFold
  4. from sklearn.tree import DecisionTreeClassifier
  5. from sklearn import model_selection,preprocessing, neighbors,metrics
  6. from sklearn.utils import shuffle
  7. from sklearn.neighbors import KNeighborsClassifier
  8. import random
  9. from sklearn.metrics import accuracy_score
  10. import matplotlib.pyplot as plt
  11. import testing,training
  12. import os
  13. import numpy as np
  14.  
  15.  
  16. train_X =[]
  17. test_X =[]
  18. train_y =[]
  19. test_y =[]
  20. max_neighbors =15
  21.  
  22. # creating a np array for dtc-->Decision tree classifier and knn .
  23. np_dtc = np.array([])
  24. np_knn = np.array([])
  25.  
  26. os.mkdir('dtc')
  27. os.mkdir('knn')
  28. os.mkdir('accuracy')
  29. os.mkdir('train_csv')
  30. os.mkdir('test_csv')
  31.  
  32. data_frame = pd.read_table(r"iris.data", sep=",", header=None, names=['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth', 'class'])
  33. data_frame.head()
  34.  
  35. # Converted the last label as classes
  36. data_frame['class'] = pd.Categorical(data_frame['class'])
  37. data_frame['class'] = data_frame['class'].cat.codes
  38.  
  39. k_fold = model_selection.KFold(n_splits=5)
  40.  
  41.  
  42. X = data_frame[data_frame.columns[:-1]].values
  43. y = data_frame['class'].values
  44.  
  45.  
  46. # Shuffling the X and y before splitting
  47. X, y = shuffle(X, y, random_state = 999)
  48.  
  49. # Spitting the test set into 5 cross valdiation
  50. for i, j in k_fold.split(data_frame):
  51.     train_X.append(X[i])
  52.     test_X.append(X[j])
  53.     train_y.append(y[i])
  54.     test_y.append(y[j])
  55.     print(train_X)
  56.     print(train_y)
  57.  
  58.  
  59. for i in range(0,5):
  60.     np.savetxt("train_csv/train_X_"+str(i)+".csv", train_X[i], delimiter=",")
  61.     np.savetxt("train_csv/train_y_"+str(i)+".csv", train_y[i], delimiter=",")    
  62.     np.savetxt("test_csv/test_X_"+str(i)+".csv", test_X[i], delimiter=",")    
  63.     np.savetxt("test_csv/test_y_"+str(i)+".csv", test_y[i], delimiter=",")    
  64.  
  65. acc_dtc = np.array([])
  66. acc_knn = np.array([])
  67.  
  68.  
  69. for i in range(5):
  70. #     This function will train and save the model for dtc in txt format in dtc directory
  71.     training.create_dtc(i)
  72.  
  73. #     This function will train and save the model for knn for different values of k in txt format in knn directory
  74.     training.create_knn(i,max_neighbors)      
  75.  
  76.     a = testing.run_dtc(i)
  77.     print("Decision Tree Accuracy"+str(a))
  78.     acc_dtc = np.append(acc_dtc,a)
  79.    
  80.     b = testing.run_knn(i,max_neighbors)
  81.     print("KNN Accuracy"+str(b))
  82.     acc_knn = np.append(acc_knn,b)      
  83.  
  84.  
  85.  
  86.  
  87. # reshaping in a matrix
  88. acc_knn = acc_knn.reshape(int(5),max_neighbors)
  89.  
  90. # average mean of k= 0-max_neighbours for all 5 fold cross sets
  91. mean_knn_accuracy = np.mean(acc_knn,axis=0)
  92.  
  93.  
  94.  
  95.  
  96. # Mean of dtc accuracies
  97. #  Accuracy may differ due to shuffling
  98. np.mean(acc_dtc)
  99.  
  100.  
  101.  
  102.  
  103. plt.bar([i+1 for i in range(0,max_neighbors)], mean_knn_accuracy)
  104. plt.ylim([0.9,1])
  105. plt.xlabel("Value of K")
  106. plt.ylabel("Mean Accuracy of 5 fold")
  107. plt.title("Mean Accuracy vs K")
  108. plt.show()
  109.  
  110.  
  111.  
  112.  
  113. Classifier = ('KNN', 'Decision Tree Classifier')
  114. y_pos = np.arange(len(Classifier))
  115. plt.bar(np.arange(2), [mean_knn_accuracy[7], np.mean(acc_dtc)])
  116. plt.ylim([0.9,1])
  117. plt.title("k-NN and Decision tree accuracy comparison")
  118. plt.ylabel("Accuracy Score")
  119. plt.xlabel("Classifiers Name")
  120. plt.xticks(y_pos, Classifier)
  121. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement