Guest User

Untitled

a guest
Feb 15th, 2019
109
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.11 KB | None | 0 0
  1. from __future__ import print_function
  2. from sklearn.model_selection import train_test_split
  3. from sklearn.neighbors import KNeighborsClassifier
  4. from sklearn.metrics import classification_report
  5. from sklearn import datasets
  6. from skimage import exposure
  7. import numpy as np
  8. from timeit import default_timer as timer
  9. # load the MNIST digits dataset
  10. mnist = datasets.load_digits()
  11.  
  12. # Training and testing split,
  13. # 75% for training and 25% for testing
  14. (trainData, testData, trainLabels, testLabels) = train_test_split(np.array(mnist.data), mnist.target, test_size=0.25, random_state=42)
  15.  
  16. # take 10% of the training data and use that for validation
  17. (trainData, valData, trainLabels, valLabels) = train_test_split(trainData, trainLabels, test_size=0.1, random_state=84)
  18.  
  19. # Checking sizes of each data split
  20. print("training data points: {}".format(len(trainLabels)))
  21. print("validation data points: {}".format(len(valLabels)))
  22. print("testing data points: {}".format(len(testLabels)))
  23.  
  24.  
  25. # initialize the values of k for our k-Nearest Neighbor classifier along with the
  26. # list of accuracies for each value of k
  27. kVals = range(1, 30, 2)
  28. accuracies = []
  29.  
  30. # loop over kVals
  31. t0 = timer()
  32. for k in range(1, 30, 2):
  33. # train the classifier with the current value of `k`
  34. model = KNeighborsClassifier(n_neighbors=k)
  35. model.fit(trainData, trainLabels)
  36.  
  37. # evaluate the model and print the accuracies list
  38. score = model.score(valData, valLabels)
  39. print("k=%d, accuracy=%.2f%%" % (k, score * 100))
  40. accuracies.append(score)
  41.  
  42. # largest accuracy
  43. # np.argmax returns the indices of the maximum values along an axis
  44. i = np.argmax(accuracies)
  45. print("k=%d achieved highest accuracy of %.2f%% on validation data" % (kVals[i],
  46. accuracies[i] * 100))
  47.  
  48.  
  49. # Now that I know the best value of k, re-train the classifier
  50. model = KNeighborsClassifier(n_neighbors=kVals[i])
  51. model.fit(trainData, trainLabels)
  52. t1 = timer()
  53. # Predict labels for the test set
  54. predictions = model.predict(testData)
  55.  
  56. # Evaluate performance of model for each of the digits
  57. print("EVALUATION ON TESTING DATA")
  58. print(classification_report(testLabels, predictions))
  59. print ("Execution time:"+str(t1-t0)+"s")
Add Comment
Please, Sign In to add comment