Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from sklearn.svm import SVC
- from sklearn.naive_bayes import GaussianNB
- import re
- import matplotlib.pyplot as plt
- option = 2 # 0: error vs gamma, 1: error vs size, 2: bayesian
- positive = [map(float, re.split(r' [0-9]+\:', i[1:-2])[1:]) for i in open("positive.dat").readlines()]
- negative = [map(float, re.split(r' [0-9]+\:', i[1:-2])[1:]) for i in open("negative.dat").readlines()]
- if option == 0:
- sizes = [1000, 2000, 3000, 4000, 5000]
- colors = ['b-', 'g-', 'r-', 'c-', 'm-']
- index = 0
- for s in sizes:
- SIZE = s
- X = np.concatenate((positive[0:SIZE/2], negative[0:SIZE/2]))
- y = np.concatenate((np.full((SIZE/2, 1), 1), np.full((SIZE/2, 1), -1))).ravel()
- gammaSequence = np.arange(0.000001, 0.0005, 0.00002)
- errors = []
- for i in gammaSequence:
- clf = SVC(kernel='poly', gamma=i)
- clf.fit(X, y)
- error = (list(clf.predict(positive[SIZE/2:])).count(-1) / float(5000 - (SIZE / 2)) + list(clf.predict(negative[SIZE/2:])).count(1)) / float(5000 - (SIZE / 2))
- errors.append(error)
- plt.plot(gammaSequence, errors, colors[index], label=str(sizes[index]) + ' min: ' + str(min(errors)) + ' at gamma: ' + str(gammaSequence[np.argmin(errors)]))
- index = index + 1
- plt.xlabel('Gamma'); plt.ylabel('Error'); plt.title('Error vs training set size and gamma')
- plt.legend()
- plt.show()
- elif option == 1:
- sizes = np.arange(1000, 6000, 1000)
- errors = []
- for s in sizes:
- SIZE = s
- X = np.concatenate((positive[0:SIZE/2], negative[0:SIZE/2]))
- y = np.concatenate((np.full((SIZE/2, 1), 1), np.full((SIZE/2, 1), -1))).ravel()
- clf = SVC(kernel='poly', gamma=0.0001)
- clf.fit(X, y)
- error = (list(clf.predict(positive[SIZE/2:])).count(-1) / float(5000 - (SIZE / 2)) + list(clf.predict(negative[SIZE/2:])).count(1)) / float(5000 - (SIZE / 2))
- errors.append(error)
- plt.plot(sizes, errors, 'b-')
- plt.plot([1000, 5000], [min(errors), min(errors)], 'r--', label="Asymptotic Error: " + str(min(errors)))
- plt.xlabel('Training Size'); plt.ylabel('Error'); plt.title('Error vs training set at gamma = 0.0001')
- plt.legend()
- plt.show()
- elif option == 2:
- sizes = np.arange(1000, 6000, 1000)
- errorsSVM = []
- errorsNB = []
- for s in sizes:
- SIZE = s
- X = np.concatenate((positive[0:SIZE/2], negative[0:SIZE/2]))
- y = np.concatenate((np.full((SIZE/2, 1), 1), np.full((SIZE/2, 1), -1))).ravel()
- clfSVM = SVC(kernel='poly', gamma=0.0001)
- clfSVM.fit(X, y)
- error = (list(clfSVM.predict(positive[SIZE/2:])).count(-1) / float(5000 - (SIZE / 2)) + list(clfSVM.predict(negative[SIZE/2:])).count(1)) / float(5000 - (SIZE / 2))
- errorsSVM.append(error)
- clfNB = GaussianNB()
- clfNB.fit(X, y)
- error = (list(clfNB.predict(positive[SIZE/2:])).count(-1) / float(5000 - (SIZE / 2)) + list(clfNB.predict(negative[SIZE/2:])).count(1)) / float(5000 - (SIZE / 2))
- errorsNB.append(error)
- plt.plot(sizes, errorsSVM, 'b-', label="SVM")
- plt.plot(sizes, errorsNB, 'r-', label="NB")
- plt.xlabel('Training Size'); plt.ylabel('Error'); plt.title('Error vs training set size, Poly SVM at gamma = 0.0001 and Gaussian NB')
- plt.legend()
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement