Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # The neural network class has 3 main attributes:
- # self.biases -> the list of matrices for the weights
- # self.weights -> the list of matrices for the biases
- # self.layers -> a list of the layers of the network and their shape ( [5, 3, 2, 1] means 4 layers with 5 neurons for the input, 3 for the first hidden, 2 for the second hidden and 1 for the output layer )
- # self.activation_functions -> a list of strings equivalent to self.layers but instead of the number of neurons per layer it determins what activation function should be used for that layer (Example [None, "relu", "relu", "sigmoid"])
- def stochastic_gradient_descent(self, input_data, loss_function, learning_rate=0.01, momentum=0.5, minibatch_size=100, relu_leak=0.1):
- # input_data -> Array of tuples (X, y) [training sample and correct guess]
- # loss_function -> A string used to choose which loss function to use
- # learning_rate -> Learning rate
- # momentum -> Momentum
- # minibatch_size -> Minibatch size
- # relu_leak -> the leak value for the relu function that is passed if the activation of the current layer is leaky_relu
- weight_velocities = [numpy.zeros_like(weights) for weights in self.weights]
- bias_velocities = [numpy.zeros_like(biases) for biases in self.biases]
- mini_batches = [input_data[i:i + minibatch_size] for i in range(0, len(input_data), minibatch_size)]
- loss = 0
- slope = 0
- for mini_batch in mini_batches:
- temp_weights = [numpy.zeros(weights_layer.shape) for weights_layer in self.weights]
- temp_biases = [numpy.zeros(bias_layer.shape) for bias_layer in self.biases]
- minibatch_loss = 0
- minibatch_slope = 0
- # Cycle for backpropagation
- for input_sample, correct_result in mini_batch:
- # Feed forward
- # network_activated_neurons is a copy of the network's neurons containing the neuron values after they were passed throught the activation function
- network_activated_neurons = [input_sample]
- # network_not_activated_neurons is a copy of the network's neurons containing the neuron values before they were passed throught the activation function
- network_not_activated_neurons = [input_sample]
- for biases, weights, activation_function in zip(self.biases, self.weights, self.activation_functions):
- x = numpy.dot(weights, network_activated_neurons[-1]) + biases
- network_not_activated_neurons.append(x)
- network_activated_neurons.append(self.calculate_activation(activation_function, x, relu_leak))
- activated_output = network_activated_neurons[-1]
- not_activated_output = network_not_activated_neurons[-1]
- # Output layer errors & gradients
- error_gradients = self.calculate_loss_gradient(loss_function, activated_output, correct_result)
- final_gradients = self.calculate_activation_prime(self.activation_functions[-1],
- not_activated_output) * error_gradients # This is basically the final result of the whole cost derivative
- minibatch_loss += self.calculate_loss(loss_function, activated_output, correct_result)
- minibatch_slope += error_gradients
- delta_biases = final_gradients
- delta_weights = final_gradients * network_activated_neurons[-2].transpose()
- weight_velocities[-1] = momentum * weight_velocities[-1] + (1 - momentum) * delta_weights
- bias_velocities[-1] = momentum * bias_velocities[-1] + (1 - momentum) * delta_biases
- temp_weights[-1] += weight_velocities[-1]
- temp_biases[-1] += bias_velocities[-1]
- # Hidden layers errors & gradients
- for layer_index in range(2, len(self.layers)):
- hidden_layer_errors = numpy.dot(self.weights[-layer_index + 1].transpose(), final_gradients)
- activation_derivative = self.calculate_activation_prime(self.activation_functions[-layer_index], network_not_activated_neurons[-layer_index])
- final_gradients = hidden_layer_errors * activation_derivative
- delta_biases = final_gradients
- delta_weights = numpy.dot(final_gradients, network_activated_neurons[-layer_index - 1].transpose())
- weight_velocities[-layer_index] = momentum * weight_velocities[-layer_index] + (1 - momentum) * delta_weights
- bias_velocities[-layer_index] = momentum * bias_velocities[-layer_index] + (1 - momentum) * delta_biases
- temp_weights[-layer_index] += weight_velocities[-layer_index]
- temp_biases[-layer_index] += bias_velocities[-layer_index]
- loss += minibatch_loss / len(mini_batch)
- slope += minibatch_slope / len(mini_batch)
- self.weights = [weights - delta_weights * (learning_rate) for weights, delta_weights in zip(self.weights, temp_weights)]
- self.biases = [biases - delta_biases * (learning_rate) for biases, delta_biases in zip(self.biases, temp_biases)]
- return loss / len(mini_batches), slope / len(mini_batches)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement