Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from scipy.io import arff
- import numpy as np
- import matplotlib.pyplot as plt
- import sklearn
- from sklearn import metrics as m
- from sklearn.cross_validation import ShuffleSplit
- #klasyfikatory
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.naive_bayes import MultinomialNB, GaussianNB
- from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, AdaBoostClassifier
- #funkcja do wczytania danych
- def load_data(path):
- [data, meta] = arff.loadarff(path)
- n1 = len(data)
- n2 = len(data[0])
- x = np.zeros((n1, n2-1))
- if meta.types()[-1] == "nominal":
- d = np.chararray((n1,1),itemsize=20)
- else:
- d = np.zeros((n1,1))
- d = d.flatten()
- for row in range(n1):
- for col in range(n2):
- if col in range(n2-1):
- x[row][col] = data[row][col]
- d[row] = data[row][-1]
- return x,d,meta
- #funkcja wyswietla w konsoli informacje o klasyfikatorze
- def print_classifier_stats(name, conf_matrix):
- print('----------------------------')
- print(name)
- print(conf_matrix)
- TN = float(conf_matrix[0][0])
- FP = float(conf_matrix[0][1])
- FN = float(conf_matrix[1][0])
- TP = float(conf_matrix[1][1])
- dokl = (TN+TP) / (TN+FP+FN+TP)
- blad = 1 - dokl
- czul = TP / (FN+TP)
- spec = TN / (TN + FP)
- prec = TP / (TP + FN)
- F1 = (2 * czul * prec) / (czul + prec)
- zbal = (czul + spec) / 2
- print("dokladnosc : " + str(dokl))
- print("blad : " + str(blad))
- print("czulosc : " + str(czul))
- print("specyficznosc : " + str(spec))
- print("precyzja : " + str(prec))
- print("F1 : " + str(F1))
- print("zbalansowana dokladnosc : " + str(zbal))
- print('----------------------------')
- #wczytuje dane
- x,d,meta = load_data('Dane/diabetes.arff')
- #tablica na klasyfikatory
- classifiers = [
- KNeighborsClassifier(3),
- MultinomialNB(),
- GaussianNB(),
- BaggingClassifier(),
- RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
- AdaBoostClassifier()
- ]
- #tablica pomocnicza na nazwy
- classifiers_n = [
- "KNeighbors Classifier",
- "MultinomialNC Classifier",
- "GaussianNB Classifier",
- "Baggibg Classifier",
- "Random Forest Classifier",
- "AdaBoost Classifier"
- ]
- #losowanie tablic do nauki i testow
- ss = ShuffleSplit(len(x), 1, 0.5, 0.5)
- #petla glowna
- plt.figure()
- for id, classifier in enumerate(classifiers):
- for train_id, test_id in ss:
- classifier.fit(x[train_id][:], d[train_id])
- y = classifier.predict(x[test_id][:])
- conf_matrix = m.confusion_matrix(d[test_id], y)
- #wypisz wyniki klasyfikatora
- print_classifier_stats(classifiers_n[id], conf_matrix)
- #krzywa ROC
- preds = classifier.predict_proba(x[test_id])[:,1]
- r_fpr, r_tpr, _ = m.roc_curve(d[test_id], preds)
- plt.subplot()
- plt.plot(r_fpr, r_tpr, label=classifiers_n[id])
- plt.title("POROWNANIE KLASYFIKATOROW - KRZYWA ROC")
- plt.xlabel('FPR')
- plt.ylabel('TPR')
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement