Advertisement
Guest User

Untitled

a guest
Jul 16th, 2018
68
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 17.30 KB | None | 0 0
  1. # Record Training Accuracy: 0.9976363636363637
  2. # Record Test Accuracy: 0.9853
  3.  
  4. import numpy as np
  5. import math
  6. from tensorflow.examples.tutorials.mnist import input_data
  7. import matplotlib.pyplot as plt
  8. from random import sample
  9. import csv
  10. from PIL import Image
  11.  
  12. num_node = [784,200,200,200,200,10]
  13. num_layer = len(num_node)
  14. num_output_node = num_node[num_layer-1]
  15.  
  16. Ntrain = 55000
  17. Ntest = 10000
  18. batch_size = 100
  19.  
  20. """
  21. # Sigmoid Function
  22. # np.multiply(A[index], 1-A[index])
  23. def g(X):
  24. return 1/(1 + np.exp(-X))
  25. """
  26. """
  27. # Tanh Function
  28. # np.multiply(1+A[index], 1-A[index])
  29. def g(X):
  30. tanhX = np.exp(2 * X)
  31. return np.divide(tanhX - 1, tanhX + 1)
  32. """
  33. """
  34. # ReLU
  35. # 1 * (A[index] > 0)
  36. def g(X):
  37. return np.maximum(X, 0)
  38. """
  39.  
  40. # Leaky ReLU
  41. # 0.99 * (A[index] > 0) + 0.01
  42. def g(X):
  43. return np.maximum(X, 0.01 * X)
  44.  
  45. # Softmax
  46. def softmax(X):
  47. N, d = X.shape
  48. return np.array([np.exp(X[i]) / np.sum(np.exp(X[i])) for i in range(N)])
  49.  
  50. mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
  51.  
  52. # Training data, read from MNIST
  53. inputpix = []
  54. output = []
  55.  
  56. for i in range(Ntrain):
  57. inputpix.append(np.array(mnist.train.images[i]))
  58. output.append(np.array(mnist.train.labels[i]))
  59. inputpix = inputpix - np.mean(inputpix, axis = 0)
  60.  
  61. np.savetxt('input.txt', inputpix, delimiter=' ')
  62. np.savetxt('output.txt', output, delimiter=' ')
  63.  
  64. # Test data
  65. inputtestpix = []
  66. outputtest = []
  67.  
  68. for i in range(Ntest):
  69. inputtestpix.append(np.array(mnist.test.images[i]))
  70. outputtest.append(np.array(mnist.test.labels[i]))
  71. inputtestpix = inputtestpix - np.mean(inputtestpix, axis = 0)
  72.  
  73. np.savetxt('inputtest.txt', inputtestpix, delimiter=' ')
  74. np.savetxt('outputtest.txt', outputtest, delimiter=' ')
  75.  
  76. # Batch Normalization: https://kratzert.github.io/2016/02/12/understanding-the-gradient-flow-through-the-batch-normalization-layer.html
  77.  
  78. def batchnorm_forward(z, beta, gamma, eps):
  79. mu = np.mean(z, axis = 0)
  80.  
  81. xmu = z - mu
  82.  
  83. sq = xmu ** 2
  84.  
  85. var = np.mean(sq, axis = 0)
  86.  
  87. sqrtvar = np.sqrt(var + eps)
  88.  
  89. ivar = 1 / sqrtvar
  90.  
  91. xhat = xmu * ivar
  92.  
  93. gammax = gamma * xhat
  94.  
  95. out = gammax + beta
  96.  
  97. cache = (xhat,gamma,xmu,ivar,sqrtvar,var,eps)
  98.  
  99. return out, cache
  100.  
  101. def batchnorm_backward(dout, cache):
  102. xhat,gamma,xmu,ivar,sqrtvar,var,eps = cache
  103. N,D = dout.shape
  104.  
  105. dbeta = np.sum(dout, axis=0)
  106. dgammax = dout
  107.  
  108. dgamma = np.sum(dgammax*xhat, axis=0)
  109. dxhat = dgammax * gamma
  110.  
  111. divar = np.sum(dxhat*xmu, axis=0)
  112. dxmu1 = dxhat * ivar
  113.  
  114. dsqrtvar = -1 /(sqrtvar**2) * divar
  115.  
  116. dvar = 0.5 * 1/np.sqrt(var+eps) * dsqrtvar
  117.  
  118. dsq = 1/N * np.ones((N,D)) * dvar
  119.  
  120. dxmu2 = 2 * xmu * dsq
  121.  
  122. dx1 = (dxmu1 + dxmu2)
  123. dmu = -1 * np.sum(dxmu1+dxmu2, axis=0)
  124.  
  125. dx2 = 1/N * np.ones((N,D)) * dmu
  126.  
  127. dx = dx1 + dx2
  128.  
  129. return dx, dgamma, dbeta
  130.  
  131. # Forwardpropagation
  132. def h(W,X, beta, gamma, eps, batchnorm):
  133. a = X
  134. for l in range(num_layer - 1):
  135. a = np.insert(a,0,1,axis = 1)
  136. z = np.dot(a,W[l])
  137. if batchnorm == 1:
  138. out, cache = batchnorm_forward(z, beta[l], gamma[l], eps)
  139. else:
  140. out = z
  141. if l == num_layer-2:
  142. print(out)
  143. print("hi")
  144. print(W)
  145. a = softmax(out)
  146. else:
  147. a = g(out)
  148. return a
  149.  
  150. # Cost Function
  151. def J(y, W, X, Lambda,beta, gamma, eps, batchnorm):
  152. cost = 0
  153. H = h(W,X,beta, gamma, eps, batchnorm)
  154. for i in range(Ntrain):
  155. for k in range(num_output_node):
  156. cost = cost - y[i][k] * math.log(H[i][k])
  157. regularization = 0
  158. for l in range(num_layer - 1):
  159. for i in range(num_node[l]):
  160. for j in range(num_node[l+1]):
  161. regularization = regularization + W[l][i+1][j] ** 2
  162. return (1/Ntrain * cost + Lambda / (2*Ntrain) * regularization)
  163.  
  164. # Backpropagation - confirmed to be correct
  165. # Algorithm based on https://www.coursera.org/learn/machine-learning/lecture/1z9WW/backpropagation-algorithm
  166. # Returns D, the value of the gradient
  167. def BackPropagation(y, W, X, Lambda,beta, gamma, eps, dropout, batchnorm):
  168. delta = np.empty(num_layer-1, dtype = object)
  169. for l in range(num_layer - 1):
  170. delta[l] = np.zeros((num_node[l]+1,num_node[l+1]))
  171. batch_index = np.array(sample(range(Ntrain), batch_size))
  172. Xcondensed = np.array([X[i] for i in batch_index])
  173. Ycondensed = np.array([y[i] for i in batch_index])
  174. A = np.empty(num_layer-1, dtype = object)
  175. cache = np.empty(num_layer-1, dtype = object)
  176. dbeta = np.empty(num_layer-1, dtype = object)
  177. dgamma = np.empty(num_layer-1, dtype = object)
  178. u = np.empty(num_layer-2, dtype = object)
  179. a = Xcondensed
  180. for l in range(num_layer - 1):
  181. A[l] = a
  182. a = np.insert(a,0,1,axis = 1)
  183. z = np.dot(a,W[l])
  184. if batchnorm == 1:
  185. out, cache[l] = batchnorm_forward(z, beta[l], gamma[l], eps)
  186. else:
  187. out = z
  188. if l == num_layer-2:
  189. a = softmax(out)
  190. else:
  191. a = g(out) # a2
  192. u[l] = (np.random.rand(*a.shape) < dropout) / dropout
  193. a = np.multiply(a,u[l]) # a1
  194. dout = a - Ycondensed
  195. if batchnorm == 0:
  196. for l in range(num_layer - 1):
  197. dbeta[l] = np.zeros(num_node[l+1])
  198. for l in range(num_layer - 1):
  199. dgamma[l] = np.zeros(num_node[l+1])
  200. if batchnorm == 1:
  201. dz, dgamma[num_layer-2], dbeta[num_layer-2] = batchnorm_backward(dout, cache[num_layer-2])
  202. else:
  203. dz = dout
  204. delta[num_layer-2] = delta[num_layer-2] + np.dot(np.transpose(np.insert(A[num_layer-2],0,1,axis = 1)),dz)
  205. for l in range(num_layer-2):
  206. index = num_layer-2-l
  207. da1 = np.dot(dz,np.transpose(np.array([W[index][k+1] for k in range(num_node[index])])))
  208. dout = da1 * (0.99 * (A[index] > 0) + 0.01) * u[index-1] # da2 = da1 * u[index-1]
  209. if batchnorm == 1:
  210. dz, dgamma[index-1], dbeta[index-1] = batchnorm_backward(dout, cache[index-1])
  211. else:
  212. dz = dout
  213. delta[index-1] = delta[index-1] + np.dot(np.transpose(np.insert(A[index-1],0,1,axis = 1)),dz)
  214. D = np.empty(num_layer-1, dtype = object)
  215. for l in range(num_layer - 1):
  216. D[l] = np.zeros((num_node[l]+1,num_node[l+1]))
  217. for l in range(num_layer-1):
  218. for i in range(num_node[l]+1):
  219. for j in range(num_node[l+1]):
  220. if i == 0:
  221. D[l][i][j] = 1/batch_size * delta[l][i][j]
  222. else:
  223. D[l][i][j] = 1/batch_size * delta[l][i][j] + Lambda/Ntrain * W[l][i][j]
  224. dbeta[l] = dbeta[l] / batch_size
  225. dgamma[l] = dgamma[l] / batch_size
  226. return D, dgamma, dbeta
  227.  
  228. # Neural network - this is where the learning/adjusting of weights occur
  229. # W is the weights
  230. # learn is the learning rate
  231. # iterations is the number of iterations we pass over the training set
  232. # Lambda is the regularization parameter
  233. def NeuralNetwork(y, X, learn0, decayrate, iterations, Lambda, eps, W, beta, gamma, dropout, batchnorm, mu):
  234.  
  235. jcost = []
  236. trainaccuracy = []
  237. testaccuracy = []
  238. x = []
  239. v = np.empty(num_layer-1, dtype = object)
  240. for l in range(num_layer - 1):
  241. v[l] = np.zeros((num_node[l]+1,num_node[l+1]))
  242. for k in range(iterations):
  243.  
  244. if k%1 == 0:
  245. x.append(k)
  246. jcost.append(J(y, W, X, Lambda, beta, gamma, eps, batchnorm))
  247. print("Cost: ", end = '')
  248. print(jcost[int(k/1)])
  249.  
  250. count = 0
  251. H = h(W,inputpix, beta, gamma, eps, batchnorm)
  252. for i in range(Ntrain):
  253. for j in range(num_output_node):
  254. if H[i][j] == np.amax(H[i]) and output[i][j] == 1:
  255. count = count + 1
  256. trainaccuracy.append(count / Ntrain)
  257. print("Train accuracy: ", end = '')
  258. print(trainaccuracy[int(k/1)])
  259.  
  260. count = 0
  261. H = h(W,inputtestpix, beta, gamma, eps, batchnorm)
  262. for i in range(Ntest):
  263. for j in range(num_output_node):
  264. if H[i][j] == np.amax(H[i]) and outputtest[i][j] == 1:
  265. count = count + 1
  266. testaccuracy.append(count / Ntest)
  267. print("Test accuracy: ", end = '')
  268. print(testaccuracy[int(k/1)])
  269.  
  270. for l in range(num_layer-1):
  271. W[l] = W[l] + mu * v[l]
  272. D, dgamma, dbeta = BackPropagation(y, W, X, Lambda, beta, gamma, eps, dropout, batchnorm)
  273. learn = 1 / (1 + k * decayrate) * learn0
  274. for l in range(num_layer-1):
  275. v[l] = mu * v[l] - learn * D[l]
  276. W[l] = W[l] - learn * D[l]
  277. if batchnorm == 1:
  278. beta[l] = beta[l] - learn * dbeta[l]
  279. gamma[l] = gamma[l] - learn * dgamma[l]
  280.  
  281. return W, beta, gamma
  282.  
  283. """
  284. W0 = np.empty(num_layer-1, dtype = object)
  285. for l in range(num_layer - 1):
  286. W0[l] = np.random.randn(num_node[l]+1,num_node[l+1]) / math.sqrt(num_node[l] / 2)
  287. beta0 = np.empty(num_layer-1, dtype = object)
  288. for l in range(num_layer - 1):
  289. beta0[l] = np.random.rand(num_node[l+1])
  290. gamma0 = np.empty(num_layer-1, dtype = object)
  291. for l in range(num_layer - 1):
  292. gamma0[l] = np.random.rand(num_node[l+1])
  293.  
  294. W1 = np.empty(num_layer-1, dtype = object)
  295. for l in range(num_layer - 1):
  296. W1[l] = np.random.randn(num_node[l]+1,num_node[l+1]) / math.sqrt(num_node[l] / 2)
  297. beta1 = np.empty(num_layer-1, dtype = object)
  298. for l in range(num_layer - 1):
  299. beta1[l] = np.random.rand(num_node[l+1])
  300. gamma1 = np.empty(num_layer-1, dtype = object)
  301. for l in range(num_layer - 1):
  302. gamma1[l] = np.random.rand(num_node[l+1])
  303.  
  304. W2 = np.empty(num_layer-1, dtype = object)
  305. for l in range(num_layer - 1):
  306. W2[l] = np.random.randn(num_node[l]+1,num_node[l+1]) / 10 # / math.sqrt(num_node[l] / 2)
  307. beta2 = np.empty(num_layer-1, dtype = object)
  308. for l in range(num_layer - 1):
  309. beta2[l] = np.random.rand(num_node[l+1])
  310. gamma2 = np.empty(num_layer-1, dtype = object)
  311. for l in range(num_layer - 1):
  312. gamma2[l] = np.random.rand(num_node[l+1])
  313.  
  314. W3 = np.empty(num_layer-1, dtype = object)
  315. for l in range(num_layer - 1):
  316. W3[l] = np.random.randn(num_node[l]+1,num_node[l+1]) / 10 # / math.sqrt(num_node[l] / 2)
  317. beta3 = np.empty(num_layer-1, dtype = object)
  318. for l in range(num_layer - 1):
  319. beta3[l] = np.random.rand(num_node[l+1])
  320. gamma3 = np.empty(num_layer-1, dtype = object)
  321. for l in range(num_layer - 1):
  322. gamma3[l] = np.random.rand(num_node[l+1])
  323.  
  324. W4 = np.empty(num_layer-1, dtype = object)
  325. for l in range(num_layer - 1):
  326. W4[l] = np.random.randn(num_node[l]+1,num_node[l+1]) / math.sqrt(num_node[l] / 2)
  327. beta4 = np.empty(num_layer-1, dtype = object)
  328. for l in range(num_layer - 1):
  329. beta4[l] = np.random.rand(num_node[l+1])
  330. gamma4 = np.empty(num_layer-1, dtype = object)
  331. for l in range(num_layer - 1):
  332. gamma4[l] = np.random.rand(num_node[l+1])
  333.  
  334. W5 = np.empty(num_layer-1, dtype = object)
  335. for l in range(num_layer - 1):
  336. W5[l] = np.random.randn(num_node[l]+1,num_node[l+1]) / math.sqrt(num_node[l] / 2)
  337. beta5 = np.empty(num_layer-1, dtype = object)
  338. for l in range(num_layer - 1):
  339. beta5[l] = np.random.rand(num_node[l+1])
  340. gamma5 = np.empty(num_layer-1, dtype = object)
  341. for l in range(num_layer - 1):
  342. gamma5[l] = np.random.rand(num_node[l+1])
  343.  
  344. for l in range(num_layer-1):
  345. for i in range(num_node[l]+1):
  346. for j in range(num_node[l+1]):
  347. W1[l][i][j] = W0[l][i][j]
  348. #W2[l][i][j] = W0[l][i][j]
  349. #W3[l][i][j] = W0[l][i][j]
  350. W3[l][i][j] = W2[l][i][j]
  351. W4[l][i][j] = W0[l][i][j]
  352. W5[l][i][j] = W0[l][i][j]
  353. for l in range(num_layer-1):
  354. for j in range(num_node[l+1]):
  355. beta1[l][j] = beta0[l][j]
  356. beta2[l][j] = beta0[l][j]
  357. beta3[l][j] = beta0[l][j]
  358. beta4[l][j] = beta0[l][j]
  359. beta5[l][j] = beta0[l][j]
  360. for l in range(num_layer-1):
  361. for j in range(num_node[l+1]):
  362. gamma1[l][j] = gamma0[l][j]
  363. gamma2[l][j] = gamma0[l][j]
  364. gamma3[l][j] = gamma0[l][j]
  365. gamma4[l][j] = gamma0[l][j]
  366. gamma5[l][j] = gamma0[l][j]
  367.  
  368. x0, jcost0, trainaccuracy0, testaccuracy0 = NeuralNetwork(output, inputpix, 10, 100000, 0, 0.00001, W0, beta0, gamma0, 0.5, 1, 0.9)
  369. x1, jcost1, trainaccuracy1, testaccuracy1 = NeuralNetwork(output, inputpix, 2.5, 1000, 0, 0.00001, W1, beta1, gamma1, 1, 1)
  370. x2, jcost2, trainaccuracy2, testaccuracy2 = NeuralNetwork(output, inputpix, 1, 500, 0, 0.00001, W2, beta2, gamma2, 1, 0)
  371. x3, jcost3, trainaccuracy3, testaccuracy3 = NeuralNetwork(output, inputpix, 2.5, 500, 0, 0.00001, W3, beta3, gamma3, 1, 1)
  372. plt.plot(x0,jcost0,'g-', x1, jcost1, 'r-', x2,jcost2,'b-', x3, jcost3, 'y-')
  373. plt.show()
  374. plt.plot(x0,trainaccuracy0,'g-', x1, trainaccuracy1, 'r-', x2,trainaccuracy2,'b-', x3, trainaccuracy3, 'y-')
  375. plt.show()
  376. plt.plot(x0,testaccuracy0,'g-', x1, testaccuracy1, 'r-', x2,testaccuracy2,'b-', x3, testaccuracy3, 'y-')
  377. plt.show()
  378. x4, jcost4, trainaccuracy4, testaccuracy4 = NeuralNetwork(output, inputpix, 0.25, 150, 0, 0.00001, W4, beta4, gamma4, 1)
  379. plt.plot(x0,jcost0,'g-', x1, jcost1, 'r-', x2, jcost2, 'b-', x3, jcost3, 'y-', x4, jcost4, 'k-')
  380. plt.show()
  381. plt.plot(x0,trainaccuracy0,'g-', x1, trainaccuracy1, 'r-', x2, trainaccuracy2, 'b-', x3, trainaccuracy3, 'y-', x4, trainaccuracy4, 'k-')
  382. plt.show()
  383. plt.plot(x0,testaccuracy0,'g-', x1, testaccuracy1, 'r-', x2, testaccuracy2, 'b-', x3, testaccuracy3, 'y-', x4, testaccuracy4, 'k-')
  384. plt.show()
  385. plt.plot(x4, jcost4, 'g-')
  386. plt.show()
  387. plt.plot(x4, testaccuracy4, 'g-', x4, trainaccuracy4, 'r-')
  388. plt.show()
  389. x5, jcost5, trainaccuracy5, testaccuracy5 = NeuralNetwork(output, inputpix, 0.75, 150, 0, 0.00001, W5, beta5, gamma5, 1)
  390. plt.plot(x0,jcost0,'g-', x1, jcost1, 'r-', x2, jcost2, 'b-', x3, jcost3, 'y-', x4, jcost4, 'k-', x5, jcost5, 'm-')
  391. plt.show()
  392. plt.plot(x0,trainaccuracy0,'g-', x1, trainaccuracy1, 'r-', x2, trainaccuracy2, 'b-', x3, trainaccuracy3, 'y-', x4, trainaccuracy4, 'k-', x5, trainaccuracy5, 'm-')
  393. plt.show()
  394. plt.plot(x0,testaccuracy0,'g-', x1, testaccuracy1, 'r-', x2, testaccuracy2, 'b-', x3, testaccuracy3, 'y-', x4, testaccuracy4, 'k-', x5, testaccuracy5, 'm-')
  395. plt.show()
  396. plt.plot(x5, jcost5, 'g-')
  397. plt.show()
  398. plt.plot(x5, testaccuracy5, 'g-', x5, trainaccuracy5, 'r-')
  399. plt.show()
  400. """
  401.  
  402. finalweights = np.empty(5, dtype = object)
  403. finalbeta = np.empty(5, dtype = object)
  404. finalgamma = np.empty(5, dtype = object)
  405.  
  406. for e in range(5):
  407. W = np.empty(num_layer-1, dtype = object)
  408. for l in range(num_layer - 1):
  409. W[l] = np.random.randn(num_node[l]+1,num_node[l+1]) #/ math.sqrt(num_node[l] / 2)
  410. beta = np.empty(num_layer-1, dtype = object)
  411. for l in range(num_layer - 1):
  412. beta[l] = np.random.rand(num_node[l+1])
  413. gamma = np.empty(num_layer-1, dtype = object)
  414. for l in range(num_layer - 1):
  415. gamma[l] = np.random.rand(num_node[l+1])
  416. finalweights[e], finalbeta[e], finalgamma[e] = NeuralNetwork(output, inputpix, 5, 0.003, 5, 1, 0.0001, W, beta, gamma, 0.75, 1, 0.9)
  417.  
  418. for l in range(num_layer - 1):
  419. with open("finalweights (" + str(e) + " " + str(l) + ").txt", 'w') as f:
  420. writer = csv.writer(f, delimiter = ' ')
  421. writer.writerows(finalweights[e][l])
  422. with open("finalbeta (" + str(e) + " " + str(l) + ").txt", 'w') as f:
  423. writer = csv.writer(f, delimiter = ' ')
  424. writer.writerows(finalbeta[e][l])
  425. with open("finalgamma (" + str(e) + " " + str(l) + ").txt", 'w') as f:
  426. writer = csv.writer(f, delimiter = ' ')
  427. writer.writerows(finalgamma[e][l])
  428.  
  429. print("Cost: ", end = '')
  430. print(e, end = ' ')
  431. print(J(output, finalweights[e], inputpix, 1, finalbeta[e], finalgamma[e], 0.0001))
  432.  
  433. count = 0
  434. H = h(finalweights[e],inputpix, finalbeta[e], finalgamma[e], 0.0001)
  435. for i in range(Ntrain):
  436. for j in range(num_output_node):
  437. if H[i][j] == np.amax(H[i]) and output[i][j] == 1:
  438. count = count + 1
  439. print("Train accuracy: ", end = '')
  440. print(e, end = ' ')
  441. print(count / Ntrain)
  442.  
  443. count = 0
  444. H = h(finalweights[e],inputtestpix, finalbeta[e], finalgamma[e], 0.0001)
  445. for i in range(Ntest):
  446. for j in range(num_output_node):
  447. if H[i][j] == np.amax(H[i]) and outputtest[i][j] == 1:
  448. count = count + 1
  449. print("Test accuracy: ", end = '')
  450. print(e, end = ' ')
  451. print(count / Ntest)
  452.  
  453. Htrain = h(finalweights[4],inputpix, finalbeta[4], finalgamma[4], 0.0001)
  454. for e in range(4):
  455. Htrain = Htrain + h(finalweights[e],inputpix, finalbeta[e], finalgamma[e], 0.0001)
  456. Htrain = Htrain / 5
  457.  
  458. Htest = h(finalweights[4],inputtestpix, finalbeta[4], finalgamma[4], 0.0001)
  459. for e in range(4):
  460. Htest = Htest + h(finalweights[e],inputtestpix, finalbeta[e], finalgamma[e], 0.0001)
  461. Htest = Htest / 5
  462.  
  463. count = 0
  464. for i in range(Ntrain):
  465. for j in range(num_output_node):
  466. if Htrain[i][j] == np.amax(Htrain[i]) and output[i][j] == 1:
  467. count = count + 1
  468. print("Train accuracy final: ", end = '')
  469. print(count / Ntrain)
  470.  
  471. count = 0
  472. for i in range(Ntest):
  473. for j in range(num_output_node):
  474. if Htest[i][j] == np.amax(Htest[i]) and outputtest[i][j] == 1:
  475. count = count + 1
  476. print("Test accuracy final: ", end = '')
  477. print(count / Ntest)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement