Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.linear_model import LogisticRegression
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.naive_bayes import GaussianNB
- from sklearn.model_selection import cross_val_score
- from sklearn.preprocessing import PolynomialFeatures
- from sklearn.pipeline import make_pipeline
- import matplotlib.pyplot as plt
- from utils import plot_classification_dataset, plot_2d_decisionboundary
- def predictWithKNN(X, y):
- print("KNN:")
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
- max_train_score = (0, 0)
- max_test_score = (0, 0)
- for k in range(5, 16):
- model = KNeighborsClassifier(n_neighbors=k, algorithm='ball_tree')
- model.fit(X_train, y_train)
- train_score = model.score(X_train, y_train)
- test_score = model.score(X_test, y_test)
- if train_score > max_train_score[1]:
- max_train_score = (k, train_score)
- if test_score > max_test_score[1]:
- max_test_score = (k, test_score)
- print("MAX Train-Score for k={0}: {1}".format(max_train_score[0], max_train_score[1]))
- print("MAX Test-Score for k={0}: {1}".format(max_test_score[0], max_test_score[1]))
- # continue with best test k val
- k = max_test_score[0];
- model = KNeighborsClassifier(n_neighbors=k, algorithm='ball_tree')
- scores = cross_val_score(model, X, y, cv=5)
- print("Mean: {0:.4f} (+/- {1:.4f})".format(scores.mean(), scores.std() * 2))
- def predictWithOneVsRestLogisticRegression(X, y):
- print("LogisticRegressor OVR")
- model = LogisticRegression(solver='lbfgs',multi_class='ovr',C = 1)
- scores = cross_val_score(model, X, y, cv=5)
- print("Mean: {0:.4f} (+/- {1:.4f})".format(scores.mean(), scores.std() * 2))
- def predictWithNaiveBayes(X, y):
- print("\nGauss:")
- model = GaussianNB();
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
- model.fit(X_train, y_train)
- scores = cross_val_score(model, X, y, cv=5)
- print("Mean: {0:.4f} (+/- {1:.4f})".format(scores.mean(), scores.std() * 2))
- if __name__ == "__main__":
- data = np.load('data11_1.npz')
- # only using features 1 to 4 as the last feature seems to be noisy and
- # probably not relevant for the classification
- X, y = data['X'][:,:4], data['y']
- X2 = X[:,2:4]
- X3 = X[:,3].reshape(-1, 1) # reshape as model expects 2 dim
- print("\nFeature 1 to 4:")
- predictWithKNN(X, y)
- predictWithNaiveBayes(X, y)
- predictWithOneVsRestLogisticRegression(X, y)
- print("\nFeature 3 and 4")
- predictWithKNN(X2, y)
- predictWithNaiveBayes(X2, y)
- predictWithOneVsRestLogisticRegression(X2, y)
- print("\nFeature 4")
- # using only the last feature
- predictWithKNN(X3, y)
- predictWithNaiveBayes(X3, y)
- predictWithOneVsRestLogisticRegression(X3, y)
Add Comment
Please, Sign In to add comment