Advertisement
Guest User

Untitled

a guest
Oct 31st, 2014
181
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.79 KB | None | 0 0
  1. import os
  2. import sys
  3. import numpy as np
  4. import sklearn.cross_validation
  5. import sklearn.neighbors
  6. import matplotlib.pyplot as plt
  7. import matplotlib.patches as mpatches
  8. import sklearn.feature_selection
  9. lib_path = os.path.join(os.path.dirname(__file__),"02450Toolbox_Python","Tools")
  10. sys.path.append(lib_path)
  11. import toolbox_02450
  12. # -*- coding: utf-8 -*-
  13. X = np.mat(np.genfromtxt(os.path.join(os.path.dirname(__file__),"spam.data")))
  14.  
  15. y = X[:,57]
  16. class_names = ["No Spam", "Spam"]
  17. X = X[:,:57]
  18.  
  19. N = len(X)
  20. k = 5
  21.  
  22.  
  23. kf = sklearn.cross_validation.KFold(N, 5, shuffle=True, random_state=0)
  24. errors = np.zeros(41)
  25. errors2 = np.zeros(41)
  26. errors3 = np.zeros(41)
  27. for k in range(0,40):
  28.     for train_index, test_index in kf:
  29.         error_sum = 0
  30.         error2_sum = 0
  31.         error3_sum = 0
  32.         X_train, X_test = X[train_index], X[test_index]
  33.         y_train, y_test = y[train_index], y[test_index]
  34.         internal_cross_validation = 10
  35.         neigh = sklearn.neighbors.KNeighborsClassifier(n_neighbors=k+1, metric="euclidean")
  36.         neigh.fit(X_train, y_train.A.ravel())
  37.         y_pred = neigh.predict(X_test)
  38.         y_true = y_test
  39.         error_sum += np.sum(y_pred.ravel()!=y_test.ravel(),dtype=float)/y_test.shape[0]
  40.  
  41.         neigh = sklearn.neighbors.KNeighborsClassifier(n_neighbors=k+1, metric="euclidean")
  42.         kbest = sklearn.feature_selection.SelectKBest(sklearn.feature_selection.chi2, 20)
  43.         kbest.fit(X_train, y_train.A.ravel())
  44.         X_train2 = kbest.transform(X_train)
  45.         X_test2 = kbest.transform(X_test)
  46.         neigh.fit(X_train2, y_train.A.ravel())
  47.         y_pred = neigh.predict(X_test2)
  48.         y_true = y_test
  49.         error2_sum += np.sum(y_pred.ravel()!=y_test.ravel(),dtype=float)/y_test.shape[0]
  50.  
  51.         neigh = sklearn.neighbors.KNeighborsClassifier(n_neighbors=k+1, metric="euclidean")
  52.         kbest = sklearn.feature_selection.SelectKBest(sklearn.feature_selection.chi2, 40)
  53.         kbest.fit(X_train, y_train.A.ravel())
  54.         X_train3 = kbest.transform(X_train)
  55.         X_test3 = kbest.transform(X_test)
  56.         neigh.fit(X_train3, y_train.A.ravel())
  57.         y_pred = neigh.predict(X_test3)
  58.         y_true = y_test
  59.         error3_sum += np.sum(y_pred.ravel()!=y_test.ravel(),dtype=float)/y_test.shape[0]
  60.  
  61.     errors[k+1] = error_sum/len(kf)
  62.     errors2[k+1] = error2_sum/len(kf)
  63.     errors3[k+1] = error3_sum/len(kf)
  64. errors = errors[1::]
  65. errors2 = errors2[1::]
  66. errors3 = errors3[1::]
  67. plt.xlim(1,40)
  68. plt.plot(range(1,41),errors*100,color="blue", label="All features")
  69. plt.plot(range(1,41),errors2*100,color="red", label="20 best, chi2")
  70. plt.plot(range(1,41),errors3*100,color="green", label="40 best, chi2")
  71. plt.legend()
  72. plt.xlabel("k neighbors")
  73. plt.ylabel("mean classification error rate %")
  74. plt.title("K-Neighbors (euclidean)")
  75. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement