Advertisement
lancernik

Untitled

Jun 4th, 2019
126
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.06 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Tue Jun 4 12:23:21 2019
  4.  
  5. @author: lancernik
  6. """
  7.  
  8.  
  9.  
  10.  
  11.  
  12. import numpy as np
  13. import pandas as pd
  14. from sklearn.decomposition import PCA
  15. from sklearn import datasets
  16. from sklearn.preprocessing import StandardScaler
  17.  
  18. # c) PCA
  19.  
  20. iris = datasets.load_iris()
  21. X = iris.data
  22. y = iris.target
  23.  
  24. #Standaryzacja danych
  25. X = StandardScaler().fit_transform(X)
  26.  
  27. #PCA
  28. pca = PCA(n_components=2)
  29. principalComponents = pca.fit_transform(X)
  30. x2 = pd.DataFrame(data = principalComponents
  31. , columns = ['1','2'])
  32.  
  33. #Szukanie wartosci wariancji > 90%
  34. total_variance = 0
  35. for i in range(4):
  36. total_variance = np.sum(pca.explained_variance_ratio_[i]+total_variance)
  37. if total_variance > 0.90:
  38. break
  39. print(total_variance)
  40.  
  41.  
  42.  
  43.  
  44. # SVM po redukcji wymiarowosci
  45.  
  46. from sklearn import svm
  47. import matplotlib.pyplot as plt
  48. from sklearn.model_selection import train_test_split
  49. from sklearn.metrics import classification_report
  50. from sklearn.metrics import accuracy_score
  51. from sklearn import metrics
  52. import seaborn as sns
  53.  
  54.  
  55. clf = svm.SVC(kernel='rbf', C=1000)
  56. clf.fit(x2, y)
  57.  
  58. X_train, X_test, y_train, y_test = train_test_split(x2, y, test_size=0.30)
  59.  
  60.  
  61. classy = svm.SVC(kernel = 'rbf', gamma="auto")
  62. classy.fit(X_train, y_train)
  63. y_pred = classy.predict(X_test)
  64.  
  65. print(classification_report(y_test, y_pred))
  66. accuracy = accuracy_score(y_test, y_pred)
  67. ZPCA = accuracy
  68. print("accuracy = ", accuracy * 100, "%")
  69.  
  70.  
  71. cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
  72. print(cnf_matrix)
  73.  
  74. conf_matrix = metrics.confusion_matrix(y_test, y_pred )
  75. class_names=[0,1]
  76. fig, ax = plt.subplots()
  77. tick_marks = [0,1]
  78. plt.xticks(tick_marks, class_names)
  79. plt.yticks(tick_marks, class_names)
  80. sns.heatmap(pd.DataFrame(conf_matrix), annot=True, cmap="YlGnBu" ,fmt='g')
  81. ax.xaxis.set_label_position("top")
  82. plt.tight_layout()
  83. plt.title('Matrix pomyłek - z PCA i standaryzacja', y=1.1)
  84. plt.ylabel('Actual label')
  85. plt.xlabel('Przewidywania')
  86.  
  87.  
  88.  
  89.  
  90.  
  91.  
  92.  
  93. # -*- coding: utf-8 -*-
  94. """
  95. Created on Tue Jun 4 14:16:36 2019
  96.  
  97. @author: laura
  98. """
  99.  
  100. import pandas as pd
  101. import pylab as pl
  102. import numpy as np
  103. import scipy.optimize as opt
  104. from sklearn import preprocessing
  105. from sklearn.model_selection import train_test_split
  106. import matplotlib.pyplot as plt
  107. from sklearn import svm
  108. from sklearn.metrics import classification_report
  109. from sklearn.metrics import accuracy_score
  110. import seaborn as sns
  111. from sklearn.metrics import confusion_matrix
  112. from sklearn import metrics
  113.  
  114. data = pd.read_csv('Data/iris.csv')
  115.  
  116. x= data.iloc[:,0:4]
  117. y= data.iloc[:,4]
  118. #drugi sposob
  119. #x = data[['sepal.length', 'sepal.width', 'petal.length', 'petal.width']]
  120. #y = np.ravel(data[['variety']], order="C")
  121.  
  122. X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.30)
  123.  
  124.  
  125. classy = svm.SVC(kernel = 'rbf', gamma="scale")
  126. #'kluczowym parametrem jest odpowiedni dobor kernela
  127. classy.fit(X_train, y_train)
  128. y_pred = classy.predict(X_test)
  129.  
  130. print(classification_report(y_test, y_pred))
  131. accuracy = accuracy_score(y_test, y_pred)
  132. BezPCA = accuracy
  133. print("accuracy = ", accuracy * 100, "%")
  134.  
  135.  
  136. cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
  137. print(cnf_matrix)
  138.  
  139. conf_matrix = metrics.confusion_matrix(y_test, y_pred )
  140. class_names=[0,1]
  141. fig, ax = plt.subplots()
  142. tick_marks = [0,1]
  143. plt.xticks(tick_marks, class_names)
  144. plt.yticks(tick_marks, class_names)
  145. sns.heatmap(pd.DataFrame(conf_matrix), annot=True, cmap="YlGnBu" ,fmt='g')
  146. ax.xaxis.set_label_position("top")
  147. plt.tight_layout()
  148. plt.title('Matrix pomyłek - bez PCA', y=1.1)
  149. plt.ylabel('Actual label')
  150. plt.xlabel('Predicted label')
  151. plt.show()
  152.  
  153. print("Bez PCA i standaryzacji {}".format(BezPCA))
  154. print("Z PCA i standaryzacja {}".format(ZPCA))
  155.  
  156.  
  157. #Lepszy wynik uzyskujemy przy podziale bez PCA, gdyż ten zbiór danych
  158. #zawiera wiecej informacji, nasz zbiór po PCA zawiera wiecej niż 90%,
  159. #ale to ciągle mniej niż 100%
  160.  
  161.  
  162.  
  163.  
  164. # 3)
  165.  
  166. iris = datasets.load_iris()
  167. x = iris.data
  168. y = iris.target
  169.  
  170. X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.30)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement