Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # this implementation was given as assignment 3 of the course
- # B55.2 WT Ausgewählte Kapitel sozialer Webtechnologien at HTW Berlin
- # third party
- import numpy as np
- import matplotlib.pyplot as plt
- # internal
- from deep_teaching_commons.data.fundamentals.mnist import Mnist
- # create mnist loader from deep_teaching_commons
- mnist_loader = Mnist(data_dir='data')
- # load all data, labels are one-hot-encoded, images are flatten and pixel squashed between [0,1]
- train_images, train_labels, test_images, test_labels = mnist_loader.get_all_data(
- one_hot_enc=True, normalized=True)
- # shuffle training data
- shuffle_index = np.random.permutation(60000)
- train_images, train_labels = train_images[shuffle_index], train_labels[shuffle_index]
- def feed_forward(X, weights):
- """
- calculates the forward path of our neural network with RELU as activation function of every neuron
- Args:
- X: input data of our neural network (in our cases - our images)
- weights: the learnable parametre of our network
- Returns:
- a matrix which represents the forward path
- """
- a = [X]
- for w in weights:
- # the last item of our list is always the latest item that was calculated
- # which is why a[-1] is always called
- a.append(np.maximum(a[-1].dot(w), 0))
- return a
- def grads(X, Y, weights):
- """
- calculates the gradient of our network by using a algorithm called backpropagation
- Args:
- X: input data of our neural network (in our cases - our images)
- Y: labels of our input data
- weights: the learnable parametre of our network
- Returns:
- the gradient of our loss function
- """
- grads = np.empty_like(weights)
- a = feed_forward(X, weights)
- # calculating the gradient
- delta = a[-1] - Y
- grads[-1] = a[-2].T.dot(delta)
- for i in range(len(a)-2, 0, -1):
- delta = (a[i] > 0) * delta.dot(weights[i].T)
- grads[i-1] = a[i-1].T.dot(delta)
- return grads / len(X)
- # To test out weather our implementation works, we are going to first initialize our neural network with
- # 3 layers with 784 input neurons, 200 hidden neurons and 10 output neurons.
- # The 784 input neurons stand for every pixel of one image (every image has a resolution of 28x28) and
- # the 10 output neurons stands for every possible numbber the image could stand for (every image could be 0-9).
- # We also set up variables for our train and test dataset
- trX, trY, teX, teY = train_images, train_labels, test_images, test_labels
- weights = [np.random.randn(
- *w) * 0.1 for w in [(784, 200), (200, 100), (100, 10)]]
- # After initializing our network we are going to train our network and then see how accurate it performs.
- # The number of epochs stands for the amount of times we are going to repeat this/repeat the training.
- #
- # In order to train our network/minimize our loss we use stochastical gradient descent method
- # which is the same as gradient descent but only uses just a part of the whole data
- # - a so called "mini-batch" - to calculate the gradient for each iteration.
- #
- # Gradient descent tries to minimize our loss function
- # by substracting our current weights with the gradient of our loss function.
- # so we have W_new = W_old - grad(L)*learning_rate
- num_epochs, batch_size, learn_rate = 10, 50, 0.1
- for i in range(num_epochs):
- for j in range(0, len(trX), batch_size):
- # creating a mini-batch with the size of batch_size
- X, Y = trX[j:j+batch_size], trY[j:j+batch_size]
- weights -= learn_rate * grads(X, Y, weights)
- prediction_test = np.argmax(feed_forward(teX, weights)[-1], axis=1)
- # prints our accuracy on the test data after training
- print(i, np.mean(prediction_test == np.argmax(teY, axis=1)))
Add Comment
Please, Sign In to add comment