Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from __future__ import print_function
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import classification_report
- from sklearn import datasets
- from skimage import exposure
- import numpy as np
- from timeit import default_timer as timer
- # load the MNIST digits dataset
- mnist = datasets.load_digits()
- # Training and testing split,
- # 75% for training and 25% for testing
- (trainData, testData, trainLabels, testLabels) = train_test_split(np.array(mnist.data), mnist.target, test_size=0.25, random_state=42)
- # take 10% of the training data and use that for validation
- (trainData, valData, trainLabels, valLabels) = train_test_split(trainData, trainLabels, test_size=0.1, random_state=84)
- # Checking sizes of each data split
- print("training data points: {}".format(len(trainLabels)))
- print("validation data points: {}".format(len(valLabels)))
- print("testing data points: {}".format(len(testLabels)))
- # initialize the values of k for our k-Nearest Neighbor classifier along with the
- # list of accuracies for each value of k
- kVals = range(1, 30, 2)
- accuracies = []
- # loop over kVals
- t0 = timer()
- for k in range(1, 30, 2):
- # train the classifier with the current value of `k`
- model = KNeighborsClassifier(n_neighbors=k)
- model.fit(trainData, trainLabels)
- # evaluate the model and print the accuracies list
- score = model.score(valData, valLabels)
- print("k=%d, accuracy=%.2f%%" % (k, score * 100))
- accuracies.append(score)
- # largest accuracy
- # np.argmax returns the indices of the maximum values along an axis
- i = np.argmax(accuracies)
- print("k=%d achieved highest accuracy of %.2f%% on validation data" % (kVals[i],
- accuracies[i] * 100))
- # Now that I know the best value of k, re-train the classifier
- model = KNeighborsClassifier(n_neighbors=kVals[i])
- model.fit(trainData, trainLabels)
- t1 = timer()
- # Predict labels for the test set
- predictions = model.predict(testData)
- # Evaluate performance of model for each of the digits
- print("EVALUATION ON TESTING DATA")
- print(classification_report(testLabels, predictions))
- print ("Execution time:"+str(t1-t0)+"s")
Add Comment
Please, Sign In to add comment