Advertisement
Guest User

Untitled

a guest
Nov 1st, 2014
132
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.27 KB | None | 0 0
  1. import math
  2. import numpy
  3.  
  4.  
  5. def read_data(dataset='learn.txt'):
  6. y = []
  7. x = []
  8. lines = 0
  9. cols = 0
  10. with open(dataset) as f:
  11. for line in f:
  12. xy = line.split()
  13. xy = [float(i) for i in xy]
  14. y.append(xy[1])
  15.  
  16. poly = create_quadratic(xy[2::])
  17. # poly = xy[2::]
  18. x.extend(poly)
  19. lines += 1
  20. cols = len(poly)
  21.  
  22. return numpy.reshape(y, (lines, 1)), numpy.reshape(x, (lines, cols))
  23.  
  24.  
  25. def create_indicator_matrix(y):
  26. cnt = len(y)
  27. count_classes = max(y)
  28. ind = numpy.zeros((cnt, count_classes))
  29. for i in range(cnt):
  30. ind[i][y[i]] = 1.0
  31.  
  32. return ind
  33.  
  34.  
  35. def linear_regression(x, y):
  36. ind = create_indicator_matrix(y)
  37. return (x.T * x).I * x.T * ind
  38.  
  39.  
  40. def normalize(x):
  41. xn = numpy.ones((x.shape[0], 1))
  42. m = x.shape[1]
  43. means = numpy.mean(x, axis=0)
  44. stds = numpy.std(x, axis=0)
  45. removed = []
  46. for i in range(1, m):
  47. if stds[i] > 0.0:
  48. x[:, i] -= means[i]
  49. x[:, i] /= stds[i]
  50. xn = numpy.hstack([xn, numpy.matrix(x[:, i]).T])
  51. else:
  52. removed.append(i)
  53.  
  54. return xn, means, stds, removed
  55.  
  56.  
  57. def create_quadratic(x):
  58. p = []
  59. for x1 in x:
  60. p.append(x1)
  61. p.append(x1 * x1)
  62.  
  63. # for x1 in x:
  64. # for x2 in x:
  65. # p.append(x1 * x2)
  66.  
  67. return p
  68.  
  69.  
  70. def sigmoid(m):
  71. g = numpy.zeros(m.shape)
  72. for x in range(len(m)):
  73. g[x] = 1.0 / (1 + math.exp(-m[x]))
  74.  
  75. return g
  76.  
  77.  
  78. def cost(x, theta, y):
  79. h = sigmoid(numpy.dot(x, theta))
  80. return -(numpy.dot(y.T, numpy.log(h)) + numpy.dot((-y + 1).T, numpy.log(-h + 1))) / len(y)
  81.  
  82.  
  83. def grad(x, theta, y):
  84. return (numpy.dot(x.T, (sigmoid(numpy.dot(x, theta)) - y))) / len(y)
  85.  
  86.  
  87. def gradient_descent(x, y, theta, alpha, num_iters):
  88. m = len(y)
  89. for i in range(num_iters):
  90. if i % 50 == 0:
  91. print('Iterations left: ', num_iters - i)
  92. theta -= alpha * grad(x, theta, y)
  93. # print((cost(x, theta, y)))
  94.  
  95. return theta
  96.  
  97.  
  98. def cohen(p, y):
  99. m = len(p)
  100. a = 0
  101. a1 = 0
  102. a2 = 0
  103. b1 = 0
  104. b2 = 0
  105. for i in range(m):
  106. if p[i] == y[i][0]:
  107. a += 1
  108.  
  109. a1 += 1 - p[i]
  110. b1 += p[i]
  111.  
  112. a2 += 1 - y[i][0]
  113. b2 += y[i][0]
  114.  
  115. f = float(m)
  116. pra = a / f
  117. pre = a1 * a2 / (f * f) + (b1 * b2) / (f * f)
  118. # pre = 0.5
  119.  
  120. return (pra - pre) / (1 - pre)
  121.  
  122.  
  123. def predict(x, theta):
  124. num = x.shape[0]
  125. p = numpy.zeros(num)
  126. for i in range(num):
  127. s = sigmoid(numpy.dot(x[i], theta))
  128. if s[0][0] >= 0.5:
  129. p[i] = 1
  130.  
  131. return p
  132.  
  133.  
  134. y, x = read_data()
  135. x, means, stds, removed = normalize(x)
  136.  
  137. t = 0
  138. for i in range(len(y)):
  139. if y[i] == 1:
  140. t += 1
  141.  
  142. if y[i] == 2:
  143. y[i] = 0
  144.  
  145. theta = numpy.zeros((x.shape[1], 1))
  146.  
  147. initial_cost = cost(x, theta, y)
  148. print('Initial cost: ', initial_cost)
  149. print('Initial cohen: ', cohen(predict(x, theta), y))
  150.  
  151. theta = gradient_descent(x, y, theta, alpha=1.0, num_iters=350)
  152.  
  153. print('Test:')
  154. print(grad(x, theta, y))
  155.  
  156. p = predict(x, theta)
  157.  
  158. print('Estimated cost: ', cost(x, theta, y))
  159. print("Cohen's cappa: ", cohen(p, y))
  160.  
  161. print(p)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement