Advertisement
Guest User

Untitled

a guest
May 20th, 2019
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.65 KB | None | 0 0
  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3. import pandas as pd
  4.  
  5.  
  6. def power_iteration(A, max_iter):
  7. d = np.random.randn(A.shape[1], 1)
  8. x_i = d
  9. for i in range(max_iter):
  10. x_i = A.dot(x_i) / np.linalg.norm(x_i)
  11. lambda_max = x_i.T.dot(A.dot(x_i)) / x_i.T.dot(x_i)
  12. return lambda_max
  13.  
  14.  
  15. def load_mnist(label1, label2):
  16. raw_train_data = pd.read_csv('data/mnist_train.csv')
  17. raw_test_data = pd.read_csv('data/mnist_test.csv')
  18. numpy_train_data = raw_train_data.as_matrix()
  19. numpy_test_data = raw_test_data.as_matrix()
  20. X_train = numpy_train_data[:, 1:]
  21. Y_train = numpy_train_data[:, 0].reshape((numpy_train_data.shape[0],))
  22. X_test = numpy_test_data[:, 1:]
  23. Y_test = numpy_test_data[:, 0].reshape((numpy_test_data.shape[0],))
  24. X_train_label1 = X_train[Y_train == label1, :]
  25. X_train_label2 = X_train[Y_train == label2, :]
  26. del X_train
  27. Y_train_label1 = Y_train[Y_train == label1]
  28. Y_train_label2 = Y_train[Y_train == label2]
  29. del Y_train
  30. X_train = np.row_stack((X_train_label1, X_train_label2))
  31. Y_train = np.row_stack((Y_train_label1.reshape((-1, 1)),
  32. Y_train_label2.reshape((-1, 1))))
  33.  
  34. Y_train[Y_train == label1] = 1
  35. Y_train[Y_train == label2] = 0
  36. perm = np.random.permutation(np.arange(X_train.shape[0]))
  37. X_train = X_train[perm, :]
  38. Y_train = Y_train[perm, :]
  39. if X_train.shape[0] >= 30000:
  40. X_train = X_train[:30000, :]
  41. Y_train = Y_train[:30000, :]
  42.  
  43. X_test_label1 = X_test[Y_test == label1, :]
  44. X_test_label2 = X_test[Y_test == label2, :]
  45. del X_test
  46. Y_test_label1 = Y_test[Y_test == label1]
  47. Y_test_label2 = Y_test[Y_test == label2]
  48. del Y_test
  49. X_test = np.row_stack((X_test_label1, X_test_label2))
  50. Y_test = np.row_stack((Y_test_label1.reshape((-1, 1)), Y_test_label2.reshape(-1, 1)))
  51. Y_test[Y_test == label1] = 1
  52. Y_test[Y_test == label2] = 0
  53. perm = np.random.permutation(np.arange(X_test.shape[0]))
  54. X_test = X_test[perm, :]
  55. Y_test = Y_test[perm, :]
  56.  
  57. return X_train.T / 255, Y_train, X_test.T / 255, Y_test
  58.  
  59.  
  60. def sigmoind(V):
  61. return 1 / (1 + np.exp(-1 * V+1e-5))
  62.  
  63.  
  64. def Cost(X, C, W):
  65. m = X.shape[1]
  66. A = sigmoind(np.matmul(X.T, W))
  67. cost = (-1 / m) * (np.dot(C.T, np.log(A)) +
  68. np.dot((1 - C).T, np.log(1 - A)))
  69. return cost
  70.  
  71.  
  72. def Gradient(X, C, W):
  73. m = X.shape[1]
  74. A = sigmoind(np.matmul(X.T, W))
  75. dW = (1 / m) * np.dot(X, A - C)
  76. return dW / np.linalg.norm(dW)
  77.  
  78.  
  79. def Hassian(X, C, W):
  80. m = X.shape[1]
  81. A = sigmoind(np.matmul(X.T, W))
  82. D = np.diag((A * (1 - A)).reshape((-1)))
  83. hassian = (1 / m) * np.matmul(X, np.matmul(D, X.T))
  84. return hassian
  85.  
  86.  
  87. def F_A(X, C, W):
  88. return Cost(X, C, W), Gradient(X, C, W), Hassian(X, C, W)
  89.  
  90.  
  91. def test(d, X, C, W, func, abs_type, title, epsilon, max_iter):
  92. f_x = Cost(X, C, W)
  93. # import pdb
  94. # pdb.set_trace()
  95. epsilons = [epsilon ** i for i in range(1, max_iter + 1)]
  96. tests = np.zeros((max_iter, 2))
  97. for i in range(0, max_iter):
  98. f_x_d = Cost(X + epsilons[i] * d, C, W)
  99. f_x_d_g = f_x + np.dot((epsilons[i] * d.T), func(X, C, W))
  100. tests[i, 0] = np.abs(f_x_d - f_x) # you probably mean abs_type?
  101. tests[i, 1] = abs_type(f_x_d - f_x_d_g)
  102.  
  103. plt.semilogy(range(1, max_iter + 1), tests[:, 0], 'r')
  104. plt.title(title + ': linear result')
  105. plt.legend(['linear result'])
  106. plt.xlabel('iterations')
  107. plt.ylabel('results')
  108. plt.savefig('imgs/{} linear result.png'.format(title))
  109. plt.show()
  110.  
  111. plt.plot(range(1, max_iter + 1), tests[:, 1], 'b')
  112. plt.title(title + ': quadratic result')
  113. plt.legend(['quadratic result'])
  114. plt.xlabel('iterations')
  115. plt.ylabel('results')
  116. plt.savefig('imgs/{} quadratic result.png'.format(title))
  117. plt.show()
  118.  
  119.  
  120. def Armijo_linesearch(X, C, W, gradient, direction, alpha=1, beta=0.5, c=1e-4, max_iter=4):
  121. for j in range(max_iter):
  122. if Cost(X, C, W + alpha * direction) <= Cost(X, C, W) + c * alpha * gradient.T.dot(direction):
  123. return alpha
  124. else:
  125. alpha = beta * alpha
  126. return alpha
  127.  
  128.  
  129. def Steepest_Descent_model(X_train, Y_train, X_test, Y_test, W, labels, max_iter):
  130. label1, label2 = labels
  131. costs_train = []
  132. costs_test = []
  133. train_cost = 0.0
  134. test_cost = 0.0
  135. for i in range(max_iter):
  136. train_cost, grad = Cost(X_train, Y_train, W), Gradient(X_train, Y_train, W)
  137. train_cost = train_cost.reshape((-1))
  138. test_cost = Cost(X_test, Y_test, W).reshape((-1))
  139. print('Training cost after {} iterations is {}'.format(i, train_cost))
  140. print('Test cost after {} iterations is {}'.format(i, test_cost))
  141. print()
  142. if train_cost < 1e-10:
  143. break
  144. costs_train.append(train_cost)
  145. costs_test.append(test_cost)
  146. alpha = Armijo_linesearch(X_train, Y_train, W, grad, grad)
  147. W = W - alpha * grad
  148.  
  149. costs_train = np.array(costs_train)
  150. costs_train = np.abs(costs_train-train_cost)
  151. costs_test = np.array(costs_test)
  152. costs_test = np.abs(costs_test - test_cost)
  153. plt.semilogy(costs_train, 'r')
  154. plt.semilogy(costs_test, 'b')
  155. plt.title('Steepest Descent: Training and Testing Convergence History')
  156. plt.legend(['Train Logistic Cost', 'Test Logistic Cost'])
  157. plt.xlabel('iterations')
  158. plt.ylabel('Cost')
  159. plt.savefig('imgs/Steepest Descent-Training and Testing {} vs {}.png'.format(label1, label2))
  160. plt.show()
  161. return W
  162.  
  163.  
  164.  
  165. def is_pos_def(x):
  166. return np.all(np.linalg.eigvals(x) > 0)
  167.  
  168. def Newton_model(X_train, Y_train, X_test, Y_test, W, labels, max_iter):
  169. label1, label2 = labels
  170. costs_train = []
  171. costs_test = []
  172. for i in range(max_iter):
  173. train_cost, grad, hassian = F_A(X_train, Y_train, W)
  174. train_cost = train_cost.reshape((-1))
  175. test_cost = Cost(X_test, Y_test, W).reshape((-1))
  176. print('Newton: Training cost after {} iterations is {}'.format(i, train_cost))
  177. print('Test cost after {} iterations is {}'.format(i, test_cost))
  178. # print(is_pos_def(hassian + 0.5 * np.eye(hassian.shape[0])))
  179.  
  180. print()
  181. costs_train.append(train_cost)
  182. costs_test.append(test_cost)
  183. W = W - Armijo_linesearch(X_train, Y_train, W, grad, grad)*np.linalg.pinv(hassian + 0.009 * np.eye(hassian.shape[0])).dot(grad)
  184.  
  185.  
  186. costs_train = np.array(costs_train)
  187. costs_train = np.abs(costs_train-train_cost)
  188. costs_test = np.array(costs_test)
  189. costs_test = np.abs(costs_test - test_cost)
  190. plt.semilogy(costs_train, 'r')
  191. plt.semilogy(costs_test, 'b')
  192. plt.title('Newton: Training and Testing Convergence History')
  193. plt.legend(['Train Logistic Cost', 'Test Logistic Cost'])
  194. plt.xlabel('iterations')
  195. plt.ylabel('Cost')
  196. plt.savefig('imgs/Newton Training and Testing {} vs {}.png'.format(label1, label2))
  197. plt.show()
  198.  
  199. return W
  200.  
  201.  
  202. def predict(X, W):
  203. return sigmoind(X.T.dot(W)) >= 0.5
  204.  
  205.  
  206. if __name__ == '__main__':
  207. X_train, Y_train, X_test, Y_test = load_mnist(0, 1)
  208. W = np.zeros((X_train.shape[0], 1)) + 1e-5
  209. # F_A(X_train, Y_train, W)
  210. # d = np.random.randn(X_train.shape[0], 1)
  211. # test(d, X_train, Y_train, W, Gradient, np.abs, 'Gradient Test', 0.5, 20)
  212. # test(d, X_train, Y_train, W, Hassian, np.linalg.norm, 'Hassian Test', 0.5, 20)
  213. # W = Steepest_Descent_model(X_train, Y_train, X_test, Y_test, W, [0, 1], 100)
  214. W = Newton_model(X_train, Y_train, X_test, Y_test, W, [0, 1], 45)
  215. # print(np.mean(Y_train == predict(X_train, W)))
  216. # print(np.mean(Y_test == predict(X_test, W)))
  217. # exit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement