Advertisement
Guest User

Untitled

a guest
May 23rd, 2017
62
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.27 KB | None | 0 0
  1. import numpy as np
  2. from sklearn.svm import SVC
  3. from sklearn.naive_bayes import GaussianNB
  4. import re
  5. import matplotlib.pyplot as plt
  6.  
  7. option = 2 # 0: error vs gamma, 1: error vs size, 2: bayesian
  8.  
  9. positive = [map(float, re.split(r' [0-9]+\:', i[1:-2])[1:]) for i in open("positive.dat").readlines()]
  10. negative = [map(float, re.split(r' [0-9]+\:', i[1:-2])[1:]) for i in open("negative.dat").readlines()]
  11.  
  12. if option == 0:
  13. sizes = [1000, 2000, 3000, 4000, 5000]
  14. colors = ['b-', 'g-', 'r-', 'c-', 'm-']
  15. index = 0
  16.  
  17. for s in sizes:
  18. SIZE = s
  19.  
  20. X = np.concatenate((positive[0:SIZE/2], negative[0:SIZE/2]))
  21. y = np.concatenate((np.full((SIZE/2, 1), 1), np.full((SIZE/2, 1), -1))).ravel()
  22.  
  23. gammaSequence = np.arange(0.000001, 0.0005, 0.00002)
  24.  
  25. errors = []
  26.  
  27. for i in gammaSequence:
  28. clf = SVC(kernel='poly', gamma=i)
  29. clf.fit(X, y)
  30. error = (list(clf.predict(positive[SIZE/2:])).count(-1) / float(5000 - (SIZE / 2)) + list(clf.predict(negative[SIZE/2:])).count(1)) / float(5000 - (SIZE / 2))
  31. errors.append(error)
  32.  
  33. plt.plot(gammaSequence, errors, colors[index], label=str(sizes[index]) + ' min: ' + str(min(errors)) + ' at gamma: ' + str(gammaSequence[np.argmin(errors)]))
  34. index = index + 1
  35.  
  36. plt.xlabel('Gamma'); plt.ylabel('Error'); plt.title('Error vs training set size and gamma')
  37. plt.legend()
  38. plt.show()
  39. elif option == 1:
  40. sizes = np.arange(1000, 6000, 1000)
  41. errors = []
  42.  
  43. for s in sizes:
  44. SIZE = s
  45.  
  46. X = np.concatenate((positive[0:SIZE/2], negative[0:SIZE/2]))
  47. y = np.concatenate((np.full((SIZE/2, 1), 1), np.full((SIZE/2, 1), -1))).ravel()
  48.  
  49. clf = SVC(kernel='poly', gamma=0.0001)
  50. clf.fit(X, y)
  51. error = (list(clf.predict(positive[SIZE/2:])).count(-1) / float(5000 - (SIZE / 2)) + list(clf.predict(negative[SIZE/2:])).count(1)) / float(5000 - (SIZE / 2))
  52. errors.append(error)
  53.  
  54. plt.plot(sizes, errors, 'b-')
  55. plt.plot([1000, 5000], [min(errors), min(errors)], 'r--', label="Asymptotic Error: " + str(min(errors)))
  56. plt.xlabel('Training Size'); plt.ylabel('Error'); plt.title('Error vs training set at gamma = 0.0001')
  57. plt.legend()
  58. plt.show()
  59. elif option == 2:
  60. sizes = np.arange(1000, 6000, 1000)
  61. errorsSVM = []
  62. errorsNB = []
  63.  
  64. for s in sizes:
  65. SIZE = s
  66.  
  67. X = np.concatenate((positive[0:SIZE/2], negative[0:SIZE/2]))
  68. y = np.concatenate((np.full((SIZE/2, 1), 1), np.full((SIZE/2, 1), -1))).ravel()
  69.  
  70. clfSVM = SVC(kernel='poly', gamma=0.0001)
  71. clfSVM.fit(X, y)
  72. error = (list(clfSVM.predict(positive[SIZE/2:])).count(-1) / float(5000 - (SIZE / 2)) + list(clfSVM.predict(negative[SIZE/2:])).count(1)) / float(5000 - (SIZE / 2))
  73. errorsSVM.append(error)
  74.  
  75. clfNB = GaussianNB()
  76. clfNB.fit(X, y)
  77. error = (list(clfNB.predict(positive[SIZE/2:])).count(-1) / float(5000 - (SIZE / 2)) + list(clfNB.predict(negative[SIZE/2:])).count(1)) / float(5000 - (SIZE / 2))
  78. errorsNB.append(error)
  79.  
  80. plt.plot(sizes, errorsSVM, 'b-', label="SVM")
  81. plt.plot(sizes, errorsNB, 'r-', label="NB")
  82. plt.xlabel('Training Size'); plt.ylabel('Error'); plt.title('Error vs training set size, Poly SVM at gamma = 0.0001 and Gaussian NB')
  83. plt.legend()
  84. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement