Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from scipy import optimize
- def read_data(dataset='learn.txt'):
- y = []
- x = []
- lines = 0
- cols = 0
- with open(dataset) as f:
- for line in f:
- xy = line.split()
- xy = [float(i) for i in xy]
- y.append(xy[1])
- poly = xy[2::]
- x.extend(poly)
- cols = len(poly)
- lines += 1
- if lines > 150:
- break
- return np.reshape(x, (lines, cols)), np.reshape(y, (lines, 1))
- def target(theta, x, y, sign=1.0):
- m = len(y)
- res = 0.0
- for i in range(m):
- for j in range(m):
- res = theta[i] * theta[j] * y[i] * y[j] * (np.dot(x[i].T, x[j]))
- return sign * (np.sum(theta) - res / 2.0)
- def jacobian(theta, x, y, sign=1.0):
- jac = []
- m = len(y)
- for i in range(m):
- t = 1.0
- if y[i] != 0:
- for j in range(m):
- t -= theta[j] * y[i] * y[j] * (np.dot(x[i].T, x[j]))
- jac.extend([sign * t])
- return np.array(jac)
- def constrains(y, c):
- return (
- {'type': 'eq',
- 'fun': lambda theta: np.dot(theta.T, y),
- 'jac': lambda x: y.T}
- )
- def bounds(m, c):
- b = []
- for i in range(m):
- b.extend([(0, c)])
- return tuple(b)
- def optimize_dual(x, y, c):
- m = len(y)
- print("Optimization begin")
- res = optimize.minimize(
- fun=target,
- x0=(np.zeros((m, 1))),
- args=(x, y, -1.0),
- jac=jacobian,
- constraints=constrains(y, c),
- bounds=bounds(m, c),
- method='SLSQP',
- options={'disp': True}
- )
- return np.reshape(res.x, (len(res.x), 1))
- def solve_straight(theta, x, y):
- m = len(y)
- beta = np.zeros((1, x.shape[1]))
- for i in range(m):
- beta += theta[i] * y[i] * x[i]
- b0 = np.dot(beta, x[0].T) - y[0]
- return np.vstack((b0, beta.T))
- def svm(x, y, c):
- theta = optimize_dual(x, y, c)
- return solve_straight(theta, x, y)
- def svm_hypothesis(beta, x):
- h = np.dot(x, beta)
- return 1.0 if h > 0 else -1.0
- # one vs all
- def multi_svm(x, y, c):
- cnt_classes = count_classes(y)
- betas = np.zeros((cnt_classes, x.shape[1]))
- for i in range(cnt_classes):
- yt = np.zeros(y.shape)
- for j in range(len(y)):
- yt[j] = 1.0 if int(y[j]) == i else 0
- print("Left: ", cnt_classes - i)
- beta = svm(x, yt, c)
- for j in range(len(betas[i])):
- betas[i][j] = beta[j]
- return betas
- def predict_multi_svm(x, betas):
- num = x.shape[0]
- p = np.zeros(num)
- len_betas = len(betas)
- for i in range(num):
- for c in range(len_betas):
- h = svm_hypothesis(betas[c], x[i])
- if h >= 0.0:
- p[i] = c
- return p
- def count_classes(y):
- return max(y) + 1
- def normalize_params(x):
- return np.mean(x, axis=0), np.std(x, axis=0)
- def normalize(x, means, stds):
- for i in range(x.shape[1]):
- if stds[i] > 0:
- x[:, i] -= means[i]
- x[:, i] /= stds[i]
- return x
- def add_ones(x):
- return np.hstack([np.ones((x.shape[0], 1)), x])
- def drop_with_zero_dev(x, stds):
- shift = 0
- for i in range(x.shape[1]):
- if stds[i] == 0:
- x = np.delete(x, i - shift, 1)
- shift += 1
- return x
- def cohen(p, y):
- cnt = len(p)
- agreement = 0.0
- classes = count_classes(y)
- info_p = np.zeros(classes)
- info_y = np.zeros(classes)
- for i in range(cnt):
- if p[i] == y[i]:
- agreement += 1
- info_p[p[i]] += 1
- info_y[int(y[i])] += 1
- f_cnt2 = float(cnt) * cnt
- pr_a = agreement / float(cnt)
- pr_e = 0.0
- for i in range(classes):
- pr_e += info_p[i] * info_y[i] / f_cnt2
- return (pr_a - pr_e) / (1 - pr_e)
- def summarize_svm(learn_x, learn_y, test_x, test_y):
- betas = multi_svm(learn_x, learn_y, c=1.0)
- predicted = predict_multi_svm(test_x, betas)
- kappa = cohen(predicted, test_y)
- print("Cohen's kappa: " + str(kappa))
- learn_x, learn_y = read_data()
- test_x, test_y = read_data('test.txt')
- means, stds = normalize_params(learn_x)
- learn_x = normalize(learn_x, means, stds)
- test_x = normalize(test_x, means, stds)
- learn_x = drop_with_zero_dev(learn_x, stds)
- test_x = drop_with_zero_dev(test_x, stds)
- print('Optimization begin:')
- learn_x = add_ones(learn_x)
- test_x = add_ones(test_x)
- summarize_svm(learn_x, learn_y, test_x, test_y)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement