Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- from sklearn import svm
- from sklearn.metrics import accuracy_score
- from math import floor
- # Read data
- alldata = pd.read_csv("./alldata.txt")
- print("alldata = ")
- print(alldata)
- print()
- print("alldata summary: ")
- print(alldata.describe())
- print()
- # Training, testing
- N = len(alldata)
- stop = floor(0.75*N)
- xtrain = alldata.loc[0:stop-1, ["X1", "X2"]]
- ytrain = alldata.loc[0:stop-1, "y"]
- xtest = alldata.loc[stop:N, ["X1", "X2"]]
- ytest = alldata.loc[stop:N, "y"]
- # Display Data
- plt.figure()
- plt.scatter(alldata[alldata.y == 1].X1, alldata[alldata.y == 1].X2, color="blue", marker="o", label="1")
- plt.scatter(alldata[alldata.y == 2].X1, alldata[alldata.y == 2].X2, color="red", marker="+", label="1")
- plt.title("Data Points")
- plt.xlabel("X1")
- plt.ylabel("X2")
- plt.legend()
- plt.show()
- # SVM Classifier
- # Grid for hyperplanes
- plt.figure()
- plt.scatter(alldata[alldata.y == 1].X1, alldata[alldata.y == 1].X2, color="blue", marker="o", label="1")
- plt.scatter(alldata[alldata.y == 2].X1, alldata[alldata.y == 2].X2, color="red", marker="+", label="1")
- plt.title("SVM Classification")
- X1 = np.arange (min(xtrain.X1.tolist()), max(xtrain.X1.tolist()), 0.01)
- X2 = np.arange (min(xtrain.X2.tolist()), max(xtrain.X2.tolist()), 0.01)
- xx, yy = np.meshgrid(X1, X2)
- # Create the SVM classifier and apply to the grid's points 'xx' and 'yy'
- # Gamma = 1
- clf = svm.SVC(kernel="rbf", gamma=1)
- clf = clf.fit(xtrain, ytrain)
- pred = clf.predict(np.c_[xx.ravel(), yy.ravel()])
- pred = pred.reshape(xx.shape)
- plt.contour(xx, yy, pred, colors="blue")
- ''''
- # Gamma = 0.01
- clf = svm.SVC(kernel="rbf", gamma=0.01)
- clf = clf.fit(xtrain, ytrain)
- pred = clf.predict(np.c_[xx.ravel(), yy.ravel()])
- pred = pred.reshape(xx.shape)
- plt.contour(xx, yy, pred, colors="red")
- # Gamma = 100
- clf = svm.SVC(kernel="rbf", gamma=100)
- clf = clf.fit(xtrain, ytrain)
- pred = clf.predict(np.c_[xx.ravel(), yy.ravel()])
- pred = pred.reshape(xx.shape)
- plt.contour(xx, yy, pred, colors="green")
- '''
- plt.show()
- # SVM Classifiers for different values of gamma
- gammavalues = [10**i for i in range(-2, 6)]
- trainingError = []
- testingError = []
- for gamma in gammavalues:
- clf = svm.SVC(kernel="rbf", gamma=gamma)
- clf = clf.fit(xtrain, ytrain)
- pred = clf.predict(xtrain)
- trainingError.append(1 - accuracy_score(ytrain, pred))
- pred = clf.predict(xtest)
- testingError.append(1 - accuracy_score(ytest, pred))
- # Plotting the training and testing error for different values of gamma
- plt.figure()
- plt.plot(trainingError, c="blue")
- plt.plot(testingError, c="red")
- plt.ylim(0, 0.5)
- plt.xticks(range(len(gammavalues)), gammavalues)
- plt.legend(["Training Error", "Testing Error"])
- plt.xlabel("Gamma")
- plt.ylabel("Error")
- plt.show()
- # Find the best value for gamma - k-fold Cross Validation
- from sklearn.model_selection import cross_val_score
- accuracies = []
- for gamma in gammavalues:
- clf = svm.SVC(kernel="rbf", gamma=gamma)
- scores = cross_val_score(clf, xtrain, ytrain, cv=10)
- accuracies.append(scores.mean())
- print(accuracies)
- print("Best gamma: ", gammavalues[np.argmax(accuracies)])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement