Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- import math
- from matplotlib.pyplot import *
- import numpy as np
- from scipy import optimize
- from math import *
- def idebug(*args):
- # return
- print(*args, file=sys.stderr, flush=True)
- def debug(*args):
- # return
- print(*args, file=sys.stderr, flush=True)
- """Neural Networks Demystified [Part 6: Training]"""
- class Trainer(object):
- def __init__(self, N):
- # Make local reference to Neural Network:
- self.N = N
- def costFunctionWrapper(self, params, X, y):
- self.N.setParams(params)
- cost = self.N.costFunction(X, y)
- grad = self.N.computeGradients(X, y)
- return cost, grad
- def callbackF(self, params):
- self.N.setParams(params)
- self.J.append(self.N.costFunction(self.X, self.y))
- def train(self, X, y, max_iterations):
- # Make internal variable for callback function:
- self.X = X
- self.y = y
- # Make empty list to store costs:
- self.J = []
- params0 = self.N.getParams()
- options = {'maxiter': max_iterations, 'disp': True}
- _res = optimize.minimize(self.costFunctionWrapper, params0, jac=True, method='BFGS', args=(X, y), options=options, callback=self.callbackF)
- self.N.setParams(_res.x)
- self.optimizationResults = _res
- class Neural_Network(object):
- def __init__(self, inputs, outputs, hidden_layers):
- # Define Hyperparameters
- self.inputLayerSize = inputs
- self.outputLayerSize = outputs
- self.hiddenLayerSize = hidden_layers
- # Weights (Parameters)
- self.W1 = np.random.randn(self.inputLayerSize, self.hiddenLayerSize)
- self.W2 = np.random.randn(self.hiddenLayerSize, self.outputLayerSize)
- def forward(self, X):
- # Propagate inputs through network
- self.z2 = np.dot(X, self.W1)
- self.a2 = self.sigmoid(self.z2)
- self.z3 = np.dot(self.a2, self.W2)
- yHat = self.sigmoid(self.z3)
- return yHat
- def sigmoid(self, z):
- """Apply sigmoid activation function to scalar, vector, or matrix"""
- return 1 / (1 + np.exp(-z))
- def sigmoidPrime(self, z):
- """Derivative of Sigmoid function"""
- return np.exp(-z) / ((1 + np.exp(-z))**2)
- def costFunction(self, X, y):
- """Compute cost for given X, y, used weights already stored in class."""
- self.yHat = self.forward(X)
- J = 0.5 * sum((y - self.yHat) ** 2)
- return J
- def costFunctionPrime(self, X, y):
- """Compute derivative with respect to W1 and W2"""
- self.yHat = self.forward(X)
- delta3 = np.multiply(-(y - self.yHat), self.sigmoidPrime(self.z3))
- dJdW2 = np.dot(self.a2.T, delta3)
- delta2 = np.dot(delta3, self.W2.T) * self.sigmoidPrime(self.z2)
- dJdW1 = np.dot(X.T, delta2)
- return dJdW1, dJdW2
- """Neural Networks Demystified [Part 5: Numerical Gradient Checking]"""
- # Helper functions for interacting with other methods/classes
- def getParams(self):
- """Get W1 and W2 Rolled into vector:"""
- params = np.concatenate((self.W1.ravel(), self.W2.ravel()))
- return params
- def setParams(self, params):
- """Set W1 and W2 usint single parameter vector:"""
- W1_start = 0
- W1_end = self.hiddenLayerSize * self.inputLayerSize
- self.W1 = np.reshape(params[W1_start: W1_end], (self.inputLayerSize, self.hiddenLayerSize))
- W2_end = W1_end + self.hiddenLayerSize * self.outputLayerSize
- self.W2 = np.reshape(params[W1_end: W2_end], (self.hiddenLayerSize, self.outputLayerSize))
- def computeGradients(self, X, y):
- dJdW1, dJdW2 = self.costFunctionPrime(X, y)
- return np.concatenate((dJdW1.ravel(), dJdW2.ravel()))
- def computeNumericalGradient(self, N, X, y):
- paramsInitial = N.getParams()
- numgrad = np.zeros(paramsInitial.shape)
- perturb = np.zeros(paramsInitial.shape)
- e = 1e-4
- for p in range(len(paramsInitial)):
- # Set perturbation vector
- perturb[p] = e
- N.setParams(paramsInitial + perturb)
- loss2 = N.costFunction(X, y)
- N.setParams(paramsInitial - perturb)
- loss1 = N.costFunction(X, y)
- # Compute Numerical Gradient:
- numgrad[p] = (loss2 - loss1) / (2 * e)
- # Return the value we changed back to zero:
- perturb[p] = 0
- # Return Params to original value:
- N.setParams(paramsInitial)
- return numgrad
- # Auto-generated code below aims at helping you parse
- # the standard input according to the problem statement.
- line = input()
- idebug(line)
- inputs, outputs, hidden_layers, test_inputs, training_examples, training_iterations = [int(i) for i in line.split()]
- tic = time.time()
- line = input()
- idebug(line)
- nodes = [int(i) for i in line.split()]
- test_data = []
- for i in range(test_inputs):
- test_input = input()
- idebug(test_input)
- test_data.append(list(test_input))
- X_test = np.array(test_data, dtype=int)
- training_data = set()
- X = np.array([], dtype=int)
- y = np.array([], dtype=int)
- training_inputs_list = []
- training_data_list = []
- for i in range(training_examples):
- line = input()
- idebug(line)
- training_inputs, expected_outputs = line.split()
- training_data.add((training_inputs, expected_outputs))
- training_inputs_list.append(list(training_inputs))
- training_data_list.append(list(expected_outputs))
- X = np.array(training_inputs_list, dtype=int)
- y = np.array(training_data_list, dtype=int)
- NN = Neural_Network(inputs, outputs, hidden_layers)
- T = Trainer(NN)
- T.train(X, y, training_iterations)
- plot(T.J)
- grid(1)
- ylabel('Cost')
- xlabel('Iterations')
- yHat = NN.forward(X_test)
- for i in range(test_inputs):
- # Write an answer using print
- # To debug: print("Debug messages...", file=sys.stderr, flush=True)
- print(yHat[i])
- debug(f'elapsed time = {round((time.time() - tic) * 1000, 2)} ms')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement