Advertisement
Guest User

Untitled

a guest
May 22nd, 2019
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.83 KB | None | 0 0
  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3. import tensorflow as tf
  4. from numpy import linalg as la
  5. from scipy.sparse import spdiags
  6.  
  7.  
  8. def jacobi(A, b, x0, max_iter, e, w=1.0):
  9. residual_norm = []
  10. convergence_factor = []
  11.  
  12. k = 0
  13. x = {k: x0}
  14.  
  15. D = np.diagflat(np.diag(A))
  16. LU = A - D
  17. for k in range(1, max_iter):
  18. x[k] = np.dot(1 - w, x[k - 1]) + np.dot(w, np.matmul(la.inv(D), (b - np.matmul(LU, x[k - 1]))))
  19.  
  20. residual_norm.append(la.norm(np.matmul(A, x[k]) - b, ord=2))
  21. convergence_factor.append(la.norm(np.matmul(A, x[k]) - b, ord=2) / la.norm(np.matmul(A, x[k - 1]) - b, ord=2))
  22.  
  23. if la.norm(np.matmul(A, x[k]) - b, ord=2) / la.norm(b, ord=2) < e:
  24. break
  25. return x[k], residual_norm, convergence_factor, k
  26.  
  27.  
  28. def gauss_seidel(A, b, x0, max_iter, e):
  29. residual_norm = []
  30. convergence_factor = []
  31.  
  32. k = 0
  33. x = {k: x0}
  34.  
  35. DL = np.tril(A)
  36. U = A - DL
  37. for k in range(1, max_iter):
  38. x[k] = np.matmul(la.inv(DL), (b - np.matmul(U, x[k - 1])))
  39.  
  40. residual_norm.append(la.norm(np.matmul(A, x[k]) - b, ord=2))
  41. convergence_factor.append(la.norm(np.matmul(A, x[k]) - b, ord=2) / la.norm(np.matmul(A, x[k - 1]) - b, ord=2))
  42.  
  43. if la.norm(np.matmul(A, x[k]) - b, ord=2) / la.norm(b, ord=2) < e:
  44. break
  45. return x[k], residual_norm, convergence_factor, k
  46.  
  47.  
  48. def steepest_descent(A, b, x0, max_iter, e):
  49. residual_norm = []
  50. convergence_factor = []
  51.  
  52. k = 0
  53. r = {}
  54. x = {k: x0}
  55.  
  56. r[0] = b - np.matmul(A, x[0])
  57. for k in range(1, max_iter):
  58. alpha = np.dot(r[k - 1], r[k - 1]) / np.dot(r[k - 1], np.matmul(A, r[k - 1]))
  59. x[k] = x[k - 1] + alpha * r[k - 1]
  60. r[k] = b - np.matmul(A, x[k])
  61.  
  62. residual_norm.append(la.norm(r[k], ord=2))
  63. convergence_factor.append(la.norm(r[k], ord=2) / la.norm(r[k - 1], ord=2))
  64.  
  65. if la.norm(np.matmul(A, x[k]) - b, ord=2) / la.norm(b, ord=2) < e:
  66. break
  67. return x[k], residual_norm, convergence_factor, k
  68.  
  69.  
  70. def conjugate_gradient(A, b, x0, max_iter, e):
  71. residual_norm = []
  72. convergence_factor = []
  73.  
  74. k = 0
  75. p = {}
  76. r = {}
  77. x = {k: x0}
  78.  
  79. p[0] = b - np.matmul(A, x[0])
  80. r[0] = b - np.matmul(A, x[0])
  81. for k in range(1, max_iter):
  82. Ap_k_prev = np.matmul(A, p[k - 1])
  83. alpha = np.dot(r[k - 1], p[k - 1]) / np.dot(p[k - 1], Ap_k_prev)
  84. x[k] = x[k - 1] + alpha * p[k - 1]
  85. r[k] = b - np.matmul(A, x[k])
  86.  
  87. residual_norm.append(la.norm(r[k], ord=2))
  88. convergence_factor.append(la.norm(r[k], ord=2) / la.norm(r[k - 1], ord=2))
  89.  
  90. if la.norm(np.matmul(A, x[k]) - b, ord=2) / la.norm(b, ord=2) < e:
  91. break
  92. beta = -np.dot(r[k], Ap_k_prev) / np.dot(p[k - 1], Ap_k_prev)
  93. p[k] = r[k] + beta * p[k - 1]
  94. return x[k], residual_norm, convergence_factor, k
  95.  
  96.  
  97. def GMRES(A, b, x0, max_iter, e):
  98. residual_norm = []
  99. convergence_factor = []
  100.  
  101. k = 0
  102. r = {}
  103. x = {k: x0}
  104.  
  105. r[0] = b - np.matmul(A, x[0])
  106. for k in range(1, max_iter):
  107. Ar_k = np.matmul(A, r[k - 1])
  108. alpha = np.dot(r[k - 1], Ar_k) / np.dot(Ar_k, Ar_k)
  109. x[k] = x[k - 1] + alpha * r[k - 1]
  110. r[k] = b - np.matmul(A, x[k])
  111.  
  112. residual_norm.append(la.norm(r[k], ord=2))
  113. convergence_factor.append(la.norm(r[k], ord=2) / la.norm(r[k - 1], ord=2))
  114.  
  115. if la.norm(np.matmul(A, x[k]) - b, ord=2) / la.norm(b, ord=2) < e:
  116. break
  117. return x[k], residual_norm, convergence_factor, k
  118.  
  119.  
  120. def sanity_check():
  121. A = np.array([[1, 2, -3],
  122. [2, -5, 1],
  123. [-3, 1, 5]])
  124. b = np.array([-20, -25, 70])
  125. x0 = np.array([0, 0, 0])
  126. # x = np.array([5, 10, 15])
  127. max_iter = 1000
  128. e = 10e-5
  129.  
  130. res = {1: jacobi(A, b, x0, max_iter, e)[0],
  131. 2: gauss_seidel(A, b, x0, max_iter, e)[0],
  132. 3: steepest_descent(A, b, x0, max_iter, e)[0],
  133. 4: conjugate_gradient(A, b, x0, max_iter, e)[0]
  134. }
  135. print(res)
  136.  
  137.  
  138. def plot_result(title, result, filename):
  139. residual_norm = result[1]
  140. convergence_factor = result[2]
  141. iterations = range(result[3])
  142.  
  143. fig, axs = plt.subplots(2, 1, constrained_layout=True)
  144. fig.suptitle(title, fontsize=16)
  145.  
  146. axs[0].semilogy(iterations, residual_norm)
  147. axs[0].set_title('Residual Norm')
  148. axs[0].set_xlabel('Iterations')
  149. axs[0].set_ylabel(r'$||Ax^{(k)}-b||$')
  150.  
  151. axs[1].semilogy(iterations, convergence_factor)
  152. axs[1].set_title('Convergence Factor')
  153. axs[1].set_xlabel('Iterations')
  154. axs[1].set_ylabel(r'$\frac{||Ax^{(k)}-b||}{||Ax^{(k-1)}-b||}$')
  155.  
  156. fig.savefig(f"resources/{filename}.pdf", bbox_inches='tight')
  157. plt.show(block=True)
  158.  
  159.  
  160. def q1():
  161. n = 100
  162. data = np.array([-np.ones(n), 2.1 * np.ones(n), -np.ones(n)])
  163. diags = np.array([-1, 0, 1])
  164. A = spdiags(data, diags, n, n).toarray()
  165. b = np.random.rand(n)
  166. x0 = np.zeros(n)
  167. max_iter = 100
  168. e = 1e-5
  169.  
  170. results = {
  171. "Jacobi w=1.0": jacobi(A, b, x0, max_iter, e, 1.0),
  172. "Jacobi w=0.75": jacobi(A, b, x0, max_iter, e, 0.75),
  173. "Gauss Seidel": gauss_seidel(A, b, x0, max_iter, e),
  174. "Steepest Descent": steepest_descent(A, b, x0, max_iter, e),
  175. "Conjugate Gradient": conjugate_gradient(A, b, x0, max_iter, e)
  176. }
  177.  
  178. i = 0
  179. for title, result in results.items():
  180. i += 1
  181. plot_result(title, result, f"Q1b{i}")
  182.  
  183.  
  184. def q3():
  185. A = np.array([[5, 4, 4, -1, 0],
  186. [3, 12, 4, -5, -5],
  187. [-4, 2, 6, 0, 3],
  188. [4, 5, -7, 10, 2],
  189. [1, 2, 5, 3, 10]])
  190. b = np.transpose(np.array([1, 1, 1, 1, 1]))
  191. x0 = np.transpose(np.array([0, 0, 0, 0, 0]))
  192. max_iter = 50
  193. e = 1e-5
  194.  
  195. plot_result("GMRES", GMRES(A, b, x0, max_iter, e), f"Q3c")
  196.  
  197.  
  198. def load_mnist(label1, label2):
  199. mnist = tf.contrib.learn.datasets.load_dataset("mnist")
  200. trainX = mnist.train.images
  201. trainY = np.asarray(mnist.train.labels, dtype=np.int32)
  202. testX = mnist.test.images
  203. testY = np.asarray(mnist.test.labels, dtype=np.int32)
  204.  
  205. train_idx = [idx for idx, label in enumerate(trainY) if label == label1 or label == label2]
  206. test_idx = [idx for idx, label in enumerate(testY) if label == label1 or label == label2]
  207.  
  208. trainX = trainX[train_idx]
  209. trainY = trainY[train_idx]
  210. testX = testX[test_idx]
  211. testY = testY[test_idx]
  212.  
  213. trainY[trainY == label1] = -1
  214. trainY[trainY == label2] = -2
  215. trainY[trainY == -1] = 0
  216. trainY[trainY == -2] = 1
  217. trainY = trainY.reshape(trainY.shape[0], 1)
  218.  
  219. testY[testY == label1] = -1
  220. testY[testY == label2] = -2
  221. testY[testY == -1] = 0
  222. testY[testY == -2] = 1
  223. testY = testY.reshape(testY.shape[0], 1)
  224.  
  225. trainX = trainX.T
  226. testX = testX.T
  227.  
  228. return trainX, trainY, testX, testY
  229.  
  230.  
  231. def sigmoid(X):
  232. return 1 / (1 + np.exp(-1 * X))
  233.  
  234.  
  235. def cost(X, C, W):
  236. m = X.shape[1]
  237. A = sigmoid(np.matmul(X.T, W))
  238.  
  239. return (-1 / m) * (np.dot(C.T, np.log(A)) +
  240. np.dot((1 - C).T, np.log(1 - A)))
  241.  
  242.  
  243. def gradient(X, C, W):
  244. m = X.shape[1]
  245. A = sigmoid(np.matmul(X.T, W))
  246. B = (1 / m) * np.dot(X, A - C)
  247. return B / la.norm(B, ord=2)
  248.  
  249.  
  250. def hessian(X, C, W):
  251. m = X.shape[1]
  252. A = sigmoid(np.matmul(X.T, W))
  253. D = np.diag((A * (1 - A)).reshape((-1)))
  254. return (1 / m) * np.matmul(X, np.matmul(D, X.T))
  255.  
  256.  
  257. def gradient_test(X, C, W, d, epsilon, max_iter, title, filename):
  258. f = cost(X, C, W)
  259. linear = []
  260. quadratic = []
  261.  
  262. for i in range(0, max_iter):
  263. e = epsilon ** i
  264. f_delta = cost(X + (e * d), C, W)
  265. linear.append(np.abs(f_delta - f)[0])
  266. quadratic.append(np.abs(f_delta - f - np.dot((e * d.T), gradient(X, C, W)))[0])
  267.  
  268. plt.semilogy(range(1, max_iter + 1), linear, 'r')
  269. plt.title(title + ' (linear)')
  270. plt.xlabel('Iterations')
  271. plt.ylabel(r'$|f(x + \epsilon d) - f(x)|$')
  272. plt.savefig(f"resources/{filename}_linear.pdf", bbox_inches='tight')
  273. plt.show()
  274.  
  275. plt.plot(range(1, max_iter + 1), quadratic, 'b')
  276. plt.title(title + ' (quadratic)')
  277. plt.xlabel('Iterations')
  278. plt.ylabel(r'$|f(x + \epsilon d) - f(x) - \epsilon d^T grad(x)|$')
  279. plt.savefig(f"resources/{filename}_quadratic.pdf", bbox_inches='tight')
  280. plt.show()
  281.  
  282.  
  283. def hessian_test(X, C, W, d, epsilon, max_iter, title, filename):
  284. f = cost(X, C, W)
  285. linear = []
  286. quadratic = []
  287.  
  288. for i in range(0, max_iter):
  289. e = epsilon ** i
  290. f_delta = cost(X + (e * d), C, W)
  291. linear.append(la.norm(f_delta - f))
  292. quadratic.append(la.norm(f_delta - f - np.dot((e * d.T), hessian(X, C, W))))
  293.  
  294. plt.semilogy(range(1, max_iter + 1), linear, 'r')
  295. plt.title(title + ' (linear)')
  296. plt.xlabel('Iterations')
  297. plt.ylabel(r'$||f(x + \epsilon d) - f(x)||$')
  298. plt.savefig(f"resources/{filename}_linear.pdf", bbox_inches='tight')
  299. plt.show()
  300.  
  301. plt.plot(range(1, max_iter + 1), quadratic, 'b')
  302. plt.title(title + ' (quadratic)')
  303. plt.xlabel('Iterations')
  304. plt.ylabel(r'$||f(x + \epsilon d) - f(x) - JacMV(x, \epsilon d)||$')
  305. plt.savefig(f"resources/{filename}_quadratic.pdf", bbox_inches='tight')
  306. plt.show()
  307.  
  308.  
  309. def q5b():
  310. trainX, trainY, testX, testY = load_mnist(0, 1)
  311. m = testX.shape[0]
  312. n = testX.shape[1]
  313. W = np.zeros((m, 1)) + 1e-5
  314. d = np.random.randn(m, 1)
  315. epsilon = 0.5
  316. max_iter = 30
  317.  
  318. gradient_test(testX, testY, W, d, epsilon, max_iter, "Gradient test", "Q5b1")
  319. hessian_test(testX, testY, W, d, epsilon, max_iter, "Jacobian test", "Q5b2")
  320.  
  321.  
  322. def armijo_linesearch(X, C, W, grad, d, max_iter=4, alpha0=1.0, beta=0.5, c=1e-4):
  323. alpha = alpha0
  324. for i in range(max_iter):
  325. if cost(X, C, W + alpha * d) <= cost(X, C, W) + c * alpha * np.dot(grad.T, d):
  326. return alpha
  327. else:
  328. alpha = beta * alpha
  329. return alpha
  330.  
  331.  
  332. def gradient_descent_model(trainX, trainY, testX, testY, W, epsilon, max_iter, label, filename):
  333. train_costs = []
  334. test_costs = []
  335.  
  336. train_cost = 0.0
  337. test_cost = 0.0
  338. for i in range(max_iter):
  339. train_cost, g = cost(trainX, trainY, W).reshape((-1)), \
  340. gradient(trainX, trainY, W)
  341. test_cost = cost(testX, testY, W).reshape((-1))
  342. if train_cost < epsilon:
  343. break
  344.  
  345. train_costs.append(train_cost)
  346. test_costs.append(test_cost)
  347. alpha = armijo_linesearch(trainX, trainY, W, g, g)
  348. W = W - alpha * g
  349.  
  350. train_costs = np.abs(np.array(train_costs) - train_cost)
  351. test_costs = np.abs(np.array(test_costs) - test_cost)
  352. plt.semilogy(train_costs, 'r')
  353. plt.semilogy(test_costs, 'b')
  354. plt.title('Gradient Descent Convergence: ' + label)
  355. plt.legend(['Train', 'Test'])
  356. plt.xlabel('Iterations')
  357. plt.ylabel('Cost')
  358. plt.savefig(f'resources/{filename}.pdf')
  359. plt.show()
  360. return W
  361.  
  362.  
  363. def newton_model(trainX, trainY, testX, testY, W, epsilon, max_iter, label, filename):
  364. train_costs = []
  365. test_costs = []
  366.  
  367. train_cost = 0.0
  368. test_cost = 0.0
  369. for i in range(max_iter):
  370. print(f'iter: {i}')
  371. train_cost, grad, hess = cost(trainX, trainY, W).reshape((-1)), \
  372. gradient(trainX, trainY, W), \
  373. hessian(trainX, trainY, W)
  374. test_cost = cost(testX, testY, W).reshape((-1))
  375. if train_cost < epsilon:
  376. break
  377.  
  378. train_costs.append(train_cost)
  379. test_costs.append(test_cost)
  380. W = W - armijo_linesearch(trainX, trainY, W, grad, grad) *\
  381. np.linalg.pinv(hess + 0.009 * np.eye(hess.shape[0])).dot(grad)
  382.  
  383. train_costs = np.abs(np.array(train_costs) - train_cost)
  384. test_costs = np.abs(np.array(test_costs) - test_cost)
  385. plt.semilogy(train_costs, 'r')
  386. plt.semilogy(test_costs, 'b')
  387. plt.title('Newton Convergence: ' + label)
  388. plt.legend(['Train', 'Test'])
  389. plt.xlabel('Iterations')
  390. plt.ylabel('Cost')
  391. plt.savefig(f'resources/{filename}.pdf')
  392. plt.show()
  393.  
  394. return W
  395.  
  396.  
  397. def q5c():
  398. epsilon = 1e-10
  399. max_iter = 100
  400.  
  401. trainX, trainY, testX, testY = load_mnist(0, 1)
  402. m = testX.shape[0]
  403. W = np.zeros((m, 1)) + 1e-5
  404. W_gd = gradient_descent_model(trainX, trainY, testX, testY, W, epsilon, max_iter, "0/1", "Q5c1a")
  405. W_n = newton_model(trainX, trainY, testX, testY, W, epsilon, 15, "0/1", "Q5c1b")
  406.  
  407. trainX, trainY, testX, testY = load_mnist(8, 9)
  408. m = testX.shape[0]
  409. W = np.zeros((m, 1)) + 1e-5
  410. W_gd = gradient_descent_model(trainX, trainY, testX, testY, W, epsilon, max_iter, "8/9", "Q5c2a")
  411. W_n = newton_model(trainX, trainY, testX, testY, W, epsilon, 15, "8/9", "Q5c2b")
  412.  
  413.  
  414. if __name__ == '__main__':
  415. plt.interactive(False)
  416. # q5b()
  417. q5c()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement