Untitled

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
import matplotlib.pyplot as plt
from utils import plot_classification_dataset, plot_2d_decisionboundary


def predictWithKNN(X, y):
    print("KNN:")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    max_train_score = (0, 0)
    max_test_score = (0, 0)
    for k in range(5, 16):
        model = KNeighborsClassifier(n_neighbors=k, algorithm='ball_tree')

        model.fit(X_train, y_train)

        train_score = model.score(X_train, y_train)
        test_score = model.score(X_test, y_test)

        if train_score > max_train_score[1]:
            max_train_score = (k, train_score)
        if test_score > max_test_score[1]:
            max_test_score = (k, test_score)

    print("MAX Train-Score for k={0}: {1}".format(max_train_score[0], max_train_score[1]))
    print("MAX Test-Score for k={0}: {1}".format(max_test_score[0], max_test_score[1]))

    # continue with best test k val
    k = max_test_score[0];
    model = KNeighborsClassifier(n_neighbors=k, algorithm='ball_tree')
    scores = cross_val_score(model, X, y, cv=5)
    print("Mean: {0:.4f} (+/- {1:.4f})".format(scores.mean(), scores.std() * 2))


def predictWithOneVsRestLogisticRegression(X, y):
    print("LogisticRegressor OVR")
    model = LogisticRegression(solver='lbfgs',multi_class='ovr',C = 1)
    scores = cross_val_score(model, X, y, cv=5)
    print("Mean: {0:.4f} (+/- {1:.4f})".format(scores.mean(), scores.std() * 2))

def predictWithNaiveBayes(X, y):
    print("\nGauss:")
    model = GaussianNB();

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    model.fit(X_train, y_train)

    scores = cross_val_score(model, X, y, cv=5)
    print("Mean: {0:.4f} (+/- {1:.4f})".format(scores.mean(), scores.std() * 2))


if __name__ == "__main__":
    data = np.load('data11_1.npz')


    # only using features 1 to 4 as the last feature seems to be noisy and
    # probably not relevant for the classification
    X, y = data['X'][:,:4], data['y']
    X2 = X[:,2:4]
    X3 = X[:,3].reshape(-1, 1) # reshape as model expects 2 dim


    print("\nFeature 1 to 4:")
    predictWithKNN(X, y)
    predictWithNaiveBayes(X, y)
    predictWithOneVsRestLogisticRegression(X, y)

    print("\nFeature 3 and 4")
    predictWithKNN(X2, y)
    predictWithNaiveBayes(X2, y)
    predictWithOneVsRestLogisticRegression(X2, y)

    print("\nFeature 4")
    # using only the last feature
    predictWithKNN(X3, y)
    predictWithNaiveBayes(X3, y)
    predictWithOneVsRestLogisticRegression(X3, y)