Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Record Training Accuracy: 0.9976363636363637
- # Record Test Accuracy: 0.9853
- import numpy as np
- import math
- from tensorflow.examples.tutorials.mnist import input_data
- import matplotlib.pyplot as plt
- from random import sample
- import csv
- from PIL import Image
- num_node = [784,200,200,200,200,10]
- num_layer = len(num_node)
- num_output_node = num_node[num_layer-1]
- Ntrain = 55000
- Ntest = 10000
- batch_size = 100
- """
- # Sigmoid Function
- # np.multiply(A[index], 1-A[index])
- def g(X):
- return 1/(1 + np.exp(-X))
- """
- """
- # Tanh Function
- # np.multiply(1+A[index], 1-A[index])
- def g(X):
- tanhX = np.exp(2 * X)
- return np.divide(tanhX - 1, tanhX + 1)
- """
- """
- # ReLU
- # 1 * (A[index] > 0)
- def g(X):
- return np.maximum(X, 0)
- """
- # Leaky ReLU
- # 0.99 * (A[index] > 0) + 0.01
- def g(X):
- return np.maximum(X, 0.01 * X)
- # Softmax
- def softmax(X):
- N, d = X.shape
- return np.array([np.exp(X[i]) / np.sum(np.exp(X[i])) for i in range(N)])
- mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
- # Training data, read from MNIST
- inputpix = []
- output = []
- for i in range(Ntrain):
- inputpix.append(np.array(mnist.train.images[i]))
- output.append(np.array(mnist.train.labels[i]))
- inputpix = inputpix - np.mean(inputpix, axis = 0)
- np.savetxt('input.txt', inputpix, delimiter=' ')
- np.savetxt('output.txt', output, delimiter=' ')
- # Test data
- inputtestpix = []
- outputtest = []
- for i in range(Ntest):
- inputtestpix.append(np.array(mnist.test.images[i]))
- outputtest.append(np.array(mnist.test.labels[i]))
- inputtestpix = inputtestpix - np.mean(inputtestpix, axis = 0)
- np.savetxt('inputtest.txt', inputtestpix, delimiter=' ')
- np.savetxt('outputtest.txt', outputtest, delimiter=' ')
- # Batch Normalization: https://kratzert.github.io/2016/02/12/understanding-the-gradient-flow-through-the-batch-normalization-layer.html
- def batchnorm_forward(z, beta, gamma, eps):
- mu = np.mean(z, axis = 0)
- xmu = z - mu
- sq = xmu ** 2
- var = np.mean(sq, axis = 0)
- sqrtvar = np.sqrt(var + eps)
- ivar = 1 / sqrtvar
- xhat = xmu * ivar
- gammax = gamma * xhat
- out = gammax + beta
- cache = (xhat,gamma,xmu,ivar,sqrtvar,var,eps)
- return out, cache
- def batchnorm_backward(dout, cache):
- xhat,gamma,xmu,ivar,sqrtvar,var,eps = cache
- N,D = dout.shape
- dbeta = np.sum(dout, axis=0)
- dgammax = dout
- dgamma = np.sum(dgammax*xhat, axis=0)
- dxhat = dgammax * gamma
- divar = np.sum(dxhat*xmu, axis=0)
- dxmu1 = dxhat * ivar
- dsqrtvar = -1 /(sqrtvar**2) * divar
- dvar = 0.5 * 1/np.sqrt(var+eps) * dsqrtvar
- dsq = 1/N * np.ones((N,D)) * dvar
- dxmu2 = 2 * xmu * dsq
- dx1 = (dxmu1 + dxmu2)
- dmu = -1 * np.sum(dxmu1+dxmu2, axis=0)
- dx2 = 1/N * np.ones((N,D)) * dmu
- dx = dx1 + dx2
- return dx, dgamma, dbeta
- # Forwardpropagation
- def h(W,X, beta, gamma, eps, batchnorm):
- a = X
- for l in range(num_layer - 1):
- a = np.insert(a,0,1,axis = 1)
- z = np.dot(a,W[l])
- if batchnorm == 1:
- out, cache = batchnorm_forward(z, beta[l], gamma[l], eps)
- else:
- out = z
- if l == num_layer-2:
- print(out)
- print("hi")
- print(W)
- a = softmax(out)
- else:
- a = g(out)
- return a
- # Cost Function
- def J(y, W, X, Lambda,beta, gamma, eps, batchnorm):
- cost = 0
- H = h(W,X,beta, gamma, eps, batchnorm)
- for i in range(Ntrain):
- for k in range(num_output_node):
- cost = cost - y[i][k] * math.log(H[i][k])
- regularization = 0
- for l in range(num_layer - 1):
- for i in range(num_node[l]):
- for j in range(num_node[l+1]):
- regularization = regularization + W[l][i+1][j] ** 2
- return (1/Ntrain * cost + Lambda / (2*Ntrain) * regularization)
- # Backpropagation - confirmed to be correct
- # Algorithm based on https://www.coursera.org/learn/machine-learning/lecture/1z9WW/backpropagation-algorithm
- # Returns D, the value of the gradient
- def BackPropagation(y, W, X, Lambda,beta, gamma, eps, dropout, batchnorm):
- delta = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- delta[l] = np.zeros((num_node[l]+1,num_node[l+1]))
- batch_index = np.array(sample(range(Ntrain), batch_size))
- Xcondensed = np.array([X[i] for i in batch_index])
- Ycondensed = np.array([y[i] for i in batch_index])
- A = np.empty(num_layer-1, dtype = object)
- cache = np.empty(num_layer-1, dtype = object)
- dbeta = np.empty(num_layer-1, dtype = object)
- dgamma = np.empty(num_layer-1, dtype = object)
- u = np.empty(num_layer-2, dtype = object)
- a = Xcondensed
- for l in range(num_layer - 1):
- A[l] = a
- a = np.insert(a,0,1,axis = 1)
- z = np.dot(a,W[l])
- if batchnorm == 1:
- out, cache[l] = batchnorm_forward(z, beta[l], gamma[l], eps)
- else:
- out = z
- if l == num_layer-2:
- a = softmax(out)
- else:
- a = g(out) # a2
- u[l] = (np.random.rand(*a.shape) < dropout) / dropout
- a = np.multiply(a,u[l]) # a1
- dout = a - Ycondensed
- if batchnorm == 0:
- for l in range(num_layer - 1):
- dbeta[l] = np.zeros(num_node[l+1])
- for l in range(num_layer - 1):
- dgamma[l] = np.zeros(num_node[l+1])
- if batchnorm == 1:
- dz, dgamma[num_layer-2], dbeta[num_layer-2] = batchnorm_backward(dout, cache[num_layer-2])
- else:
- dz = dout
- delta[num_layer-2] = delta[num_layer-2] + np.dot(np.transpose(np.insert(A[num_layer-2],0,1,axis = 1)),dz)
- for l in range(num_layer-2):
- index = num_layer-2-l
- da1 = np.dot(dz,np.transpose(np.array([W[index][k+1] for k in range(num_node[index])])))
- dout = da1 * (0.99 * (A[index] > 0) + 0.01) * u[index-1] # da2 = da1 * u[index-1]
- if batchnorm == 1:
- dz, dgamma[index-1], dbeta[index-1] = batchnorm_backward(dout, cache[index-1])
- else:
- dz = dout
- delta[index-1] = delta[index-1] + np.dot(np.transpose(np.insert(A[index-1],0,1,axis = 1)),dz)
- D = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- D[l] = np.zeros((num_node[l]+1,num_node[l+1]))
- for l in range(num_layer-1):
- for i in range(num_node[l]+1):
- for j in range(num_node[l+1]):
- if i == 0:
- D[l][i][j] = 1/batch_size * delta[l][i][j]
- else:
- D[l][i][j] = 1/batch_size * delta[l][i][j] + Lambda/Ntrain * W[l][i][j]
- dbeta[l] = dbeta[l] / batch_size
- dgamma[l] = dgamma[l] / batch_size
- return D, dgamma, dbeta
- # Neural network - this is where the learning/adjusting of weights occur
- # W is the weights
- # learn is the learning rate
- # iterations is the number of iterations we pass over the training set
- # Lambda is the regularization parameter
- def NeuralNetwork(y, X, learn0, decayrate, iterations, Lambda, eps, W, beta, gamma, dropout, batchnorm, mu):
- jcost = []
- trainaccuracy = []
- testaccuracy = []
- x = []
- v = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- v[l] = np.zeros((num_node[l]+1,num_node[l+1]))
- for k in range(iterations):
- if k%1 == 0:
- x.append(k)
- jcost.append(J(y, W, X, Lambda, beta, gamma, eps, batchnorm))
- print("Cost: ", end = '')
- print(jcost[int(k/1)])
- count = 0
- H = h(W,inputpix, beta, gamma, eps, batchnorm)
- for i in range(Ntrain):
- for j in range(num_output_node):
- if H[i][j] == np.amax(H[i]) and output[i][j] == 1:
- count = count + 1
- trainaccuracy.append(count / Ntrain)
- print("Train accuracy: ", end = '')
- print(trainaccuracy[int(k/1)])
- count = 0
- H = h(W,inputtestpix, beta, gamma, eps, batchnorm)
- for i in range(Ntest):
- for j in range(num_output_node):
- if H[i][j] == np.amax(H[i]) and outputtest[i][j] == 1:
- count = count + 1
- testaccuracy.append(count / Ntest)
- print("Test accuracy: ", end = '')
- print(testaccuracy[int(k/1)])
- for l in range(num_layer-1):
- W[l] = W[l] + mu * v[l]
- D, dgamma, dbeta = BackPropagation(y, W, X, Lambda, beta, gamma, eps, dropout, batchnorm)
- learn = 1 / (1 + k * decayrate) * learn0
- for l in range(num_layer-1):
- v[l] = mu * v[l] - learn * D[l]
- W[l] = W[l] - learn * D[l]
- if batchnorm == 1:
- beta[l] = beta[l] - learn * dbeta[l]
- gamma[l] = gamma[l] - learn * dgamma[l]
- return W, beta, gamma
- """
- W0 = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- W0[l] = np.random.randn(num_node[l]+1,num_node[l+1]) / math.sqrt(num_node[l] / 2)
- beta0 = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- beta0[l] = np.random.rand(num_node[l+1])
- gamma0 = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- gamma0[l] = np.random.rand(num_node[l+1])
- W1 = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- W1[l] = np.random.randn(num_node[l]+1,num_node[l+1]) / math.sqrt(num_node[l] / 2)
- beta1 = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- beta1[l] = np.random.rand(num_node[l+1])
- gamma1 = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- gamma1[l] = np.random.rand(num_node[l+1])
- W2 = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- W2[l] = np.random.randn(num_node[l]+1,num_node[l+1]) / 10 # / math.sqrt(num_node[l] / 2)
- beta2 = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- beta2[l] = np.random.rand(num_node[l+1])
- gamma2 = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- gamma2[l] = np.random.rand(num_node[l+1])
- W3 = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- W3[l] = np.random.randn(num_node[l]+1,num_node[l+1]) / 10 # / math.sqrt(num_node[l] / 2)
- beta3 = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- beta3[l] = np.random.rand(num_node[l+1])
- gamma3 = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- gamma3[l] = np.random.rand(num_node[l+1])
- W4 = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- W4[l] = np.random.randn(num_node[l]+1,num_node[l+1]) / math.sqrt(num_node[l] / 2)
- beta4 = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- beta4[l] = np.random.rand(num_node[l+1])
- gamma4 = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- gamma4[l] = np.random.rand(num_node[l+1])
- W5 = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- W5[l] = np.random.randn(num_node[l]+1,num_node[l+1]) / math.sqrt(num_node[l] / 2)
- beta5 = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- beta5[l] = np.random.rand(num_node[l+1])
- gamma5 = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- gamma5[l] = np.random.rand(num_node[l+1])
- for l in range(num_layer-1):
- for i in range(num_node[l]+1):
- for j in range(num_node[l+1]):
- W1[l][i][j] = W0[l][i][j]
- #W2[l][i][j] = W0[l][i][j]
- #W3[l][i][j] = W0[l][i][j]
- W3[l][i][j] = W2[l][i][j]
- W4[l][i][j] = W0[l][i][j]
- W5[l][i][j] = W0[l][i][j]
- for l in range(num_layer-1):
- for j in range(num_node[l+1]):
- beta1[l][j] = beta0[l][j]
- beta2[l][j] = beta0[l][j]
- beta3[l][j] = beta0[l][j]
- beta4[l][j] = beta0[l][j]
- beta5[l][j] = beta0[l][j]
- for l in range(num_layer-1):
- for j in range(num_node[l+1]):
- gamma1[l][j] = gamma0[l][j]
- gamma2[l][j] = gamma0[l][j]
- gamma3[l][j] = gamma0[l][j]
- gamma4[l][j] = gamma0[l][j]
- gamma5[l][j] = gamma0[l][j]
- x0, jcost0, trainaccuracy0, testaccuracy0 = NeuralNetwork(output, inputpix, 10, 100000, 0, 0.00001, W0, beta0, gamma0, 0.5, 1, 0.9)
- x1, jcost1, trainaccuracy1, testaccuracy1 = NeuralNetwork(output, inputpix, 2.5, 1000, 0, 0.00001, W1, beta1, gamma1, 1, 1)
- x2, jcost2, trainaccuracy2, testaccuracy2 = NeuralNetwork(output, inputpix, 1, 500, 0, 0.00001, W2, beta2, gamma2, 1, 0)
- x3, jcost3, trainaccuracy3, testaccuracy3 = NeuralNetwork(output, inputpix, 2.5, 500, 0, 0.00001, W3, beta3, gamma3, 1, 1)
- plt.plot(x0,jcost0,'g-', x1, jcost1, 'r-', x2,jcost2,'b-', x3, jcost3, 'y-')
- plt.show()
- plt.plot(x0,trainaccuracy0,'g-', x1, trainaccuracy1, 'r-', x2,trainaccuracy2,'b-', x3, trainaccuracy3, 'y-')
- plt.show()
- plt.plot(x0,testaccuracy0,'g-', x1, testaccuracy1, 'r-', x2,testaccuracy2,'b-', x3, testaccuracy3, 'y-')
- plt.show()
- x4, jcost4, trainaccuracy4, testaccuracy4 = NeuralNetwork(output, inputpix, 0.25, 150, 0, 0.00001, W4, beta4, gamma4, 1)
- plt.plot(x0,jcost0,'g-', x1, jcost1, 'r-', x2, jcost2, 'b-', x3, jcost3, 'y-', x4, jcost4, 'k-')
- plt.show()
- plt.plot(x0,trainaccuracy0,'g-', x1, trainaccuracy1, 'r-', x2, trainaccuracy2, 'b-', x3, trainaccuracy3, 'y-', x4, trainaccuracy4, 'k-')
- plt.show()
- plt.plot(x0,testaccuracy0,'g-', x1, testaccuracy1, 'r-', x2, testaccuracy2, 'b-', x3, testaccuracy3, 'y-', x4, testaccuracy4, 'k-')
- plt.show()
- plt.plot(x4, jcost4, 'g-')
- plt.show()
- plt.plot(x4, testaccuracy4, 'g-', x4, trainaccuracy4, 'r-')
- plt.show()
- x5, jcost5, trainaccuracy5, testaccuracy5 = NeuralNetwork(output, inputpix, 0.75, 150, 0, 0.00001, W5, beta5, gamma5, 1)
- plt.plot(x0,jcost0,'g-', x1, jcost1, 'r-', x2, jcost2, 'b-', x3, jcost3, 'y-', x4, jcost4, 'k-', x5, jcost5, 'm-')
- plt.show()
- plt.plot(x0,trainaccuracy0,'g-', x1, trainaccuracy1, 'r-', x2, trainaccuracy2, 'b-', x3, trainaccuracy3, 'y-', x4, trainaccuracy4, 'k-', x5, trainaccuracy5, 'm-')
- plt.show()
- plt.plot(x0,testaccuracy0,'g-', x1, testaccuracy1, 'r-', x2, testaccuracy2, 'b-', x3, testaccuracy3, 'y-', x4, testaccuracy4, 'k-', x5, testaccuracy5, 'm-')
- plt.show()
- plt.plot(x5, jcost5, 'g-')
- plt.show()
- plt.plot(x5, testaccuracy5, 'g-', x5, trainaccuracy5, 'r-')
- plt.show()
- """
- finalweights = np.empty(5, dtype = object)
- finalbeta = np.empty(5, dtype = object)
- finalgamma = np.empty(5, dtype = object)
- for e in range(5):
- W = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- W[l] = np.random.randn(num_node[l]+1,num_node[l+1]) #/ math.sqrt(num_node[l] / 2)
- beta = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- beta[l] = np.random.rand(num_node[l+1])
- gamma = np.empty(num_layer-1, dtype = object)
- for l in range(num_layer - 1):
- gamma[l] = np.random.rand(num_node[l+1])
- finalweights[e], finalbeta[e], finalgamma[e] = NeuralNetwork(output, inputpix, 5, 0.003, 5, 1, 0.0001, W, beta, gamma, 0.75, 1, 0.9)
- for l in range(num_layer - 1):
- with open("finalweights (" + str(e) + " " + str(l) + ").txt", 'w') as f:
- writer = csv.writer(f, delimiter = ' ')
- writer.writerows(finalweights[e][l])
- with open("finalbeta (" + str(e) + " " + str(l) + ").txt", 'w') as f:
- writer = csv.writer(f, delimiter = ' ')
- writer.writerows(finalbeta[e][l])
- with open("finalgamma (" + str(e) + " " + str(l) + ").txt", 'w') as f:
- writer = csv.writer(f, delimiter = ' ')
- writer.writerows(finalgamma[e][l])
- print("Cost: ", end = '')
- print(e, end = ' ')
- print(J(output, finalweights[e], inputpix, 1, finalbeta[e], finalgamma[e], 0.0001))
- count = 0
- H = h(finalweights[e],inputpix, finalbeta[e], finalgamma[e], 0.0001)
- for i in range(Ntrain):
- for j in range(num_output_node):
- if H[i][j] == np.amax(H[i]) and output[i][j] == 1:
- count = count + 1
- print("Train accuracy: ", end = '')
- print(e, end = ' ')
- print(count / Ntrain)
- count = 0
- H = h(finalweights[e],inputtestpix, finalbeta[e], finalgamma[e], 0.0001)
- for i in range(Ntest):
- for j in range(num_output_node):
- if H[i][j] == np.amax(H[i]) and outputtest[i][j] == 1:
- count = count + 1
- print("Test accuracy: ", end = '')
- print(e, end = ' ')
- print(count / Ntest)
- Htrain = h(finalweights[4],inputpix, finalbeta[4], finalgamma[4], 0.0001)
- for e in range(4):
- Htrain = Htrain + h(finalweights[e],inputpix, finalbeta[e], finalgamma[e], 0.0001)
- Htrain = Htrain / 5
- Htest = h(finalweights[4],inputtestpix, finalbeta[4], finalgamma[4], 0.0001)
- for e in range(4):
- Htest = Htest + h(finalweights[e],inputtestpix, finalbeta[e], finalgamma[e], 0.0001)
- Htest = Htest / 5
- count = 0
- for i in range(Ntrain):
- for j in range(num_output_node):
- if Htrain[i][j] == np.amax(Htrain[i]) and output[i][j] == 1:
- count = count + 1
- print("Train accuracy final: ", end = '')
- print(count / Ntrain)
- count = 0
- for i in range(Ntest):
- for j in range(num_output_node):
- if Htest[i][j] == np.amax(Htest[i]) and outputtest[i][j] == 1:
- count = count + 1
- print("Test accuracy final: ", end = '')
- print(count / Ntest)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement