Advertisement
Jesorx

SGD implementation

Apr 9th, 2020
342
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.12 KB | None | 0 0
  1. # The neural network class has 3 main attributes:
  2. # self.biases -> the list of matrices for the weights
  3. # self.weights -> the list of matrices for the biases
  4. # self.layers -> a list of the layers of the network and their shape ( [5, 3, 2, 1] means 4 layers with 5 neurons for the input, 3 for the first hidden, 2 for the second hidden and 1 for the output layer )
  5. # self.activation_functions -> a list of strings equivalent to self.layers but instead of the number of neurons per layer it determins what activation function should be used for that layer (Example [None, "relu", "relu", "sigmoid"])
  6.  
  7. def stochastic_gradient_descent(self, input_data, loss_function, learning_rate=0.01, momentum=0.5, minibatch_size=100, relu_leak=0.1):
  8.     # input_data -> Array of tuples (X, y) [training sample and correct guess]
  9.     # loss_function -> A string used to choose which loss function to use
  10.     # learning_rate -> Learning rate
  11.     # momentum -> Momentum
  12.     # minibatch_size -> Minibatch size
  13.     # relu_leak -> the leak value for the relu function that is passed if the activation of the current layer is leaky_relu
  14.  
  15.     weight_velocities = [numpy.zeros_like(weights) for weights in self.weights]
  16.     bias_velocities = [numpy.zeros_like(biases) for biases in self.biases]
  17.  
  18.     mini_batches = [input_data[i:i + minibatch_size] for i in range(0, len(input_data), minibatch_size)]
  19.     loss = 0
  20.     slope = 0
  21.     for mini_batch in mini_batches:
  22.         temp_weights = [numpy.zeros(weights_layer.shape) for weights_layer in self.weights]
  23.         temp_biases = [numpy.zeros(bias_layer.shape) for bias_layer in self.biases]
  24.  
  25.         minibatch_loss = 0
  26.         minibatch_slope = 0
  27.         # Cycle for backpropagation
  28.         for input_sample, correct_result in mini_batch:
  29.  
  30.             # Feed forward
  31.             # network_activated_neurons is a copy of the network's neurons containing the neuron values after they were passed throught the activation function
  32.             network_activated_neurons = [input_sample]
  33.             # network_not_activated_neurons is a copy of the network's neurons containing the neuron values before they were passed throught the activation function
  34.             network_not_activated_neurons = [input_sample]
  35.             for biases, weights, activation_function in zip(self.biases, self.weights, self.activation_functions):
  36.                 x = numpy.dot(weights, network_activated_neurons[-1]) + biases
  37.                 network_not_activated_neurons.append(x)
  38.                 network_activated_neurons.append(self.calculate_activation(activation_function, x, relu_leak))
  39.  
  40.             activated_output = network_activated_neurons[-1]
  41.             not_activated_output = network_not_activated_neurons[-1]
  42.  
  43.             # Output layer errors & gradients
  44.             error_gradients = self.calculate_loss_gradient(loss_function, activated_output, correct_result)
  45.             final_gradients = self.calculate_activation_prime(self.activation_functions[-1],
  46.                                                               not_activated_output) * error_gradients  # This is basically the final result of the whole cost derivative
  47.  
  48.             minibatch_loss += self.calculate_loss(loss_function, activated_output, correct_result)
  49.             minibatch_slope += error_gradients
  50.  
  51.             delta_biases = final_gradients
  52.             delta_weights = final_gradients * network_activated_neurons[-2].transpose()
  53.  
  54.             weight_velocities[-1] = momentum * weight_velocities[-1] + (1 - momentum) * delta_weights
  55.             bias_velocities[-1] = momentum * bias_velocities[-1] + (1 - momentum) * delta_biases
  56.  
  57.             temp_weights[-1] += weight_velocities[-1]
  58.             temp_biases[-1] += bias_velocities[-1]
  59.  
  60.  
  61.             # Hidden layers errors & gradients
  62.             for layer_index in range(2, len(self.layers)):
  63.                 hidden_layer_errors = numpy.dot(self.weights[-layer_index + 1].transpose(), final_gradients)
  64.                 activation_derivative = self.calculate_activation_prime(self.activation_functions[-layer_index], network_not_activated_neurons[-layer_index])
  65.  
  66.                 final_gradients = hidden_layer_errors * activation_derivative
  67.  
  68.                 delta_biases = final_gradients
  69.                 delta_weights = numpy.dot(final_gradients, network_activated_neurons[-layer_index - 1].transpose())
  70.  
  71.                 weight_velocities[-layer_index] = momentum * weight_velocities[-layer_index] + (1 - momentum) * delta_weights
  72.                 bias_velocities[-layer_index] = momentum * bias_velocities[-layer_index] + (1 - momentum) * delta_biases
  73.  
  74.                 temp_weights[-layer_index] += weight_velocities[-layer_index]
  75.                 temp_biases[-layer_index] += bias_velocities[-layer_index]
  76.  
  77.         loss += minibatch_loss / len(mini_batch)
  78.         slope += minibatch_slope / len(mini_batch)
  79.         self.weights = [weights - delta_weights * (learning_rate) for weights, delta_weights in zip(self.weights, temp_weights)]
  80.         self.biases = [biases - delta_biases * (learning_rate) for biases, delta_biases in zip(self.biases, temp_biases)]
  81.     return loss / len(mini_batches), slope / len(mini_batches)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement