ML - Lab 5 - SVM

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.metrics import accuracy_score
from math import floor

# Read data
alldata = pd.read_csv("./alldata.txt")
print("alldata = ")
print(alldata)
print()
print("alldata summary: ")
print(alldata.describe())
print()

# Training, testing
N = len(alldata)
stop = floor(0.75*N)
xtrain = alldata.loc[0:stop-1, ["X1", "X2"]]
ytrain = alldata.loc[0:stop-1, "y"]
xtest = alldata.loc[stop:N, ["X1", "X2"]]
ytest = alldata.loc[stop:N, "y"]

# Display Data
plt.figure()
plt.scatter(alldata[alldata.y == 1].X1, alldata[alldata.y == 1].X2, color="blue", marker="o", label="1")
plt.scatter(alldata[alldata.y == 2].X1, alldata[alldata.y == 2].X2, color="red", marker="+", label="1")
plt.title("Data Points")
plt.xlabel("X1")
plt.ylabel("X2")
plt.legend()
plt.show()


# SVM Classifier
# Grid for hyperplanes
plt.figure()
plt.scatter(alldata[alldata.y == 1].X1, alldata[alldata.y == 1].X2, color="blue", marker="o", label="1")
plt.scatter(alldata[alldata.y == 2].X1, alldata[alldata.y == 2].X2, color="red", marker="+", label="1")
plt.title("SVM Classification")
X1 = np.arange (min(xtrain.X1.tolist()), max(xtrain.X1.tolist()), 0.01)
X2 = np.arange (min(xtrain.X2.tolist()), max(xtrain.X2.tolist()), 0.01)
xx, yy = np.meshgrid(X1, X2)

# Create the SVM classifier and apply to the grid's points 'xx' and 'yy'
# Gamma = 1
clf = svm.SVC(kernel="rbf", gamma=1)
clf = clf.fit(xtrain, ytrain)
pred = clf.predict(np.c_[xx.ravel(), yy.ravel()])
pred = pred.reshape(xx.shape)
plt.contour(xx, yy, pred, colors="blue")
''''
# Gamma = 0.01
clf = svm.SVC(kernel="rbf", gamma=0.01)
clf = clf.fit(xtrain, ytrain)
pred = clf.predict(np.c_[xx.ravel(), yy.ravel()])
pred = pred.reshape(xx.shape)
plt.contour(xx, yy, pred, colors="red")
# Gamma = 100
clf = svm.SVC(kernel="rbf", gamma=100)
clf = clf.fit(xtrain, ytrain)
pred = clf.predict(np.c_[xx.ravel(), yy.ravel()])
pred = pred.reshape(xx.shape)
plt.contour(xx, yy, pred, colors="green")
'''
plt.show()


# SVM Classifiers for different values of gamma
gammavalues = [10**i for i in range(-2, 6)]
trainingError = []
testingError = []
for gamma in gammavalues:
    clf = svm.SVC(kernel="rbf", gamma=gamma)
    clf = clf.fit(xtrain, ytrain)
    pred = clf.predict(xtrain)
    trainingError.append(1 - accuracy_score(ytrain, pred))
    pred = clf.predict(xtest)
    testingError.append(1 - accuracy_score(ytest, pred))

# Plotting the training and testing error for different values of gamma
plt.figure()
plt.plot(trainingError, c="blue")
plt.plot(testingError, c="red")
plt.ylim(0, 0.5)
plt.xticks(range(len(gammavalues)), gammavalues)
plt.legend(["Training Error", "Testing Error"])
plt.xlabel("Gamma")
plt.ylabel("Error")
plt.show()


# Find the best value for gamma - k-fold Cross Validation
from sklearn.model_selection import cross_val_score
accuracies = []
for gamma in gammavalues:
    clf = svm.SVC(kernel="rbf", gamma=gamma)
    scores = cross_val_score(clf, xtrain, ytrain, cv=10)
    accuracies.append(scores.mean())
print(accuracies)
print("Best gamma: ", gammavalues[np.argmax(accuracies)])