Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Tue Jun 4 12:23:21 2019
- @author: lancernik
- """
- import numpy as np
- import pandas as pd
- from sklearn.decomposition import PCA
- from sklearn import datasets
- from sklearn.preprocessing import StandardScaler
- # c) PCA
- iris = datasets.load_iris()
- X = iris.data
- y = iris.target
- #Standaryzacja danych
- X = StandardScaler().fit_transform(X)
- #PCA
- pca = PCA(n_components=2)
- principalComponents = pca.fit_transform(X)
- x2 = pd.DataFrame(data = principalComponents
- , columns = ['1','2'])
- #Szukanie wartosci wariancji > 90%
- total_variance = 0
- for i in range(4):
- total_variance = np.sum(pca.explained_variance_ratio_[i]+total_variance)
- if total_variance > 0.90:
- break
- print(total_variance)
- # SVM po redukcji wymiarowosci
- from sklearn import svm
- import matplotlib.pyplot as plt
- from sklearn.model_selection import train_test_split
- from sklearn.metrics import classification_report
- from sklearn.metrics import accuracy_score
- from sklearn import metrics
- import seaborn as sns
- clf = svm.SVC(kernel='rbf', C=1000)
- clf.fit(x2, y)
- X_train, X_test, y_train, y_test = train_test_split(x2, y, test_size=0.30)
- classy = svm.SVC(kernel = 'rbf', gamma="auto")
- classy.fit(X_train, y_train)
- y_pred = classy.predict(X_test)
- print(classification_report(y_test, y_pred))
- accuracy = accuracy_score(y_test, y_pred)
- ZPCA = accuracy
- print("accuracy = ", accuracy * 100, "%")
- cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
- print(cnf_matrix)
- conf_matrix = metrics.confusion_matrix(y_test, y_pred )
- class_names=[0,1]
- fig, ax = plt.subplots()
- tick_marks = [0,1]
- plt.xticks(tick_marks, class_names)
- plt.yticks(tick_marks, class_names)
- sns.heatmap(pd.DataFrame(conf_matrix), annot=True, cmap="YlGnBu" ,fmt='g')
- ax.xaxis.set_label_position("top")
- plt.tight_layout()
- plt.title('Matrix pomyłek - z PCA i standaryzacja', y=1.1)
- plt.ylabel('Actual label')
- plt.xlabel('Przewidywania')
- # -*- coding: utf-8 -*-
- """
- Created on Tue Jun 4 14:16:36 2019
- @author: laura
- """
- import pandas as pd
- import pylab as pl
- import numpy as np
- import scipy.optimize as opt
- from sklearn import preprocessing
- from sklearn.model_selection import train_test_split
- import matplotlib.pyplot as plt
- from sklearn import svm
- from sklearn.metrics import classification_report
- from sklearn.metrics import accuracy_score
- import seaborn as sns
- from sklearn.metrics import confusion_matrix
- from sklearn import metrics
- data = pd.read_csv('Data/iris.csv')
- x= data.iloc[:,0:4]
- y= data.iloc[:,4]
- #drugi sposob
- #x = data[['sepal.length', 'sepal.width', 'petal.length', 'petal.width']]
- #y = np.ravel(data[['variety']], order="C")
- X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.30)
- classy = svm.SVC(kernel = 'rbf', gamma="scale")
- #'kluczowym parametrem jest odpowiedni dobor kernela
- classy.fit(X_train, y_train)
- y_pred = classy.predict(X_test)
- print(classification_report(y_test, y_pred))
- accuracy = accuracy_score(y_test, y_pred)
- BezPCA = accuracy
- print("accuracy = ", accuracy * 100, "%")
- cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
- print(cnf_matrix)
- conf_matrix = metrics.confusion_matrix(y_test, y_pred )
- class_names=[0,1]
- fig, ax = plt.subplots()
- tick_marks = [0,1]
- plt.xticks(tick_marks, class_names)
- plt.yticks(tick_marks, class_names)
- sns.heatmap(pd.DataFrame(conf_matrix), annot=True, cmap="YlGnBu" ,fmt='g')
- ax.xaxis.set_label_position("top")
- plt.tight_layout()
- plt.title('Matrix pomyłek - bez PCA', y=1.1)
- plt.ylabel('Actual label')
- plt.xlabel('Predicted label')
- plt.show()
- print("Bez PCA i standaryzacji {}".format(BezPCA))
- print("Z PCA i standaryzacja {}".format(ZPCA))
- #Lepszy wynik uzyskujemy przy podziale bez PCA, gdyż ten zbiór danych
- #zawiera wiecej informacji, nasz zbiór po PCA zawiera wiecej niż 90%,
- #ale to ciągle mniej niż 100%
- # 3)
- iris = datasets.load_iris()
- x = iris.data
- y = iris.target
- X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.30)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement