Advertisement
Guest User

Untitled

a guest
Nov 28th, 2014
175
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.56 KB | None | 0 0
  1. import numpy as np
  2. from scipy import optimize
  3.  
  4.  
  5. def read_data(dataset='learn.txt'):
  6.     y = []
  7.     x = []
  8.     lines = 0
  9.     cols = 0
  10.     with open(dataset) as f:
  11.         for line in f:
  12.             xy = line.split()
  13.             xy = [float(i) for i in xy]
  14.             y.append(xy[1])
  15.  
  16.             poly = xy[2::]
  17.             x.extend(poly)
  18.  
  19.             cols = len(poly)
  20.             lines += 1
  21.             if lines > 150:
  22.                 break
  23.  
  24.     return np.reshape(x, (lines, cols)), np.reshape(y, (lines, 1))
  25.  
  26.  
  27. def target(theta, x, y, sign=1.0):
  28.     m = len(y)
  29.     res = 0.0
  30.     for i in range(m):
  31.         for j in range(m):
  32.             res = theta[i] * theta[j] * y[i] * y[j] * (np.dot(x[i].T, x[j]))
  33.  
  34.     return sign * (np.sum(theta) - res / 2.0)
  35.  
  36.  
  37. def jacobian(theta, x, y, sign=1.0):
  38.     jac = []
  39.     m = len(y)
  40.     for i in range(m):
  41.         t = 1.0
  42.         if y[i] != 0:
  43.             for j in range(m):
  44.                 t -= theta[j] * y[i] * y[j] * (np.dot(x[i].T, x[j]))
  45.  
  46.         jac.extend([sign * t])
  47.  
  48.     return np.array(jac)
  49.  
  50.  
  51. def constrains(y, c):
  52.     return (
  53.         {'type': 'eq',
  54.          'fun': lambda theta: np.dot(theta.T, y),
  55.          'jac': lambda x: y.T}
  56.     )
  57.  
  58.  
  59. def bounds(m, c):
  60.     b = []
  61.     for i in range(m):
  62.         b.extend([(0, c)])
  63.  
  64.     return tuple(b)
  65.  
  66.  
  67. def optimize_dual(x, y, c):
  68.     m = len(y)
  69.     print("Optimization begin")
  70.     res = optimize.minimize(
  71.         fun=target,
  72.         x0=(np.zeros((m, 1))),
  73.         args=(x, y, -1.0),
  74.         jac=jacobian,
  75.         constraints=constrains(y, c),
  76.         bounds=bounds(m, c),
  77.         method='SLSQP',
  78.         options={'disp': True}
  79.     )
  80.  
  81.     return np.reshape(res.x, (len(res.x), 1))
  82.  
  83.  
  84. def solve_straight(theta, x, y):
  85.     m = len(y)
  86.     beta = np.zeros((1, x.shape[1]))
  87.     for i in range(m):
  88.         beta += theta[i] * y[i] * x[i]
  89.  
  90.     b0 = np.dot(beta, x[0].T) - y[0]
  91.  
  92.     return np.vstack((b0, beta.T))
  93.  
  94.  
  95. def svm(x, y, c):
  96.     theta = optimize_dual(x, y, c)
  97.     return solve_straight(theta, x, y)
  98.  
  99.  
  100. def svm_hypothesis(beta, x):
  101.     h = np.dot(x, beta)
  102.     return 1.0 if h > 0 else -1.0
  103.  
  104.  
  105. # one vs all
  106. def multi_svm(x, y, c):
  107.     cnt_classes = count_classes(y)
  108.     betas = np.zeros((cnt_classes, x.shape[1]))
  109.  
  110.     for i in range(cnt_classes):
  111.         yt = np.zeros(y.shape)
  112.         for j in range(len(y)):
  113.             yt[j] = 1.0 if int(y[j]) == i else 0
  114.  
  115.         print("Left: ", cnt_classes - i)
  116.         beta = svm(x, yt, c)
  117.         for j in range(len(betas[i])):
  118.             betas[i][j] = beta[j]
  119.  
  120.     return betas
  121.  
  122.  
  123. def predict_multi_svm(x, betas):
  124.     num = x.shape[0]
  125.     p = np.zeros(num)
  126.     len_betas = len(betas)
  127.     for i in range(num):
  128.         for c in range(len_betas):
  129.             h = svm_hypothesis(betas[c], x[i])
  130.             if h >= 0.0:
  131.                 p[i] = c
  132.  
  133.     return p
  134.  
  135.  
  136. def count_classes(y):
  137.     return max(y) + 1
  138.  
  139.  
  140. def normalize_params(x):
  141.     return np.mean(x, axis=0), np.std(x, axis=0)
  142.  
  143.  
  144. def normalize(x, means, stds):
  145.     for i in range(x.shape[1]):
  146.         if stds[i] > 0:
  147.             x[:, i] -= means[i]
  148.             x[:, i] /= stds[i]
  149.  
  150.     return x
  151.  
  152.  
  153. def add_ones(x):
  154.     return np.hstack([np.ones((x.shape[0], 1)), x])
  155.  
  156.  
  157. def drop_with_zero_dev(x, stds):
  158.     shift = 0
  159.     for i in range(x.shape[1]):
  160.         if stds[i] == 0:
  161.             x = np.delete(x, i - shift, 1)
  162.             shift += 1
  163.  
  164.     return x
  165.  
  166.  
  167. def cohen(p, y):
  168.     cnt = len(p)
  169.  
  170.     agreement = 0.0
  171.     classes = count_classes(y)
  172.     info_p = np.zeros(classes)
  173.     info_y = np.zeros(classes)
  174.  
  175.     for i in range(cnt):
  176.         if p[i] == y[i]:
  177.             agreement += 1
  178.         info_p[p[i]] += 1
  179.         info_y[int(y[i])] += 1
  180.  
  181.     f_cnt2 = float(cnt) * cnt
  182.     pr_a = agreement / float(cnt)
  183.     pr_e = 0.0
  184.     for i in range(classes):
  185.         pr_e += info_p[i] * info_y[i] / f_cnt2
  186.  
  187.     return (pr_a - pr_e) / (1 - pr_e)
  188.  
  189.  
  190. def summarize_svm(learn_x, learn_y, test_x, test_y):
  191.     betas = multi_svm(learn_x, learn_y, c=1.0)
  192.     predicted = predict_multi_svm(test_x, betas)
  193.  
  194.     kappa = cohen(predicted, test_y)
  195.  
  196.     print("Cohen's kappa: " + str(kappa))
  197.  
  198.  
  199. learn_x, learn_y = read_data()
  200. test_x, test_y = read_data('test.txt')
  201.  
  202.  
  203. means, stds = normalize_params(learn_x)
  204.  
  205. learn_x = normalize(learn_x, means, stds)
  206. test_x = normalize(test_x, means, stds)
  207.  
  208. learn_x = drop_with_zero_dev(learn_x, stds)
  209. test_x = drop_with_zero_dev(test_x, stds)
  210.  
  211. print('Optimization begin:')
  212.  
  213. learn_x = add_ones(learn_x)
  214. test_x = add_ones(test_x)
  215.  
  216. summarize_svm(learn_x, learn_y, test_x, test_y)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement