Untitled

/// <summary>
///   Calculates partial derivatives for all weights of the network.
/// </summary>
///
/// <param name="input">The input vector.</param>
/// <param name="desiredOutput">Desired output vector.</param>
/// <param name="outputIndex">The current output location (index) in the desired output vector.</param>
///
/// <returns>Returns summary squared error of the last layer.</returns>
///
private double CalculateDerivatives(double[] input, double[] desiredOutput, int outputIndex)
{
    // assume, that all neurons of the network have the same activation function
    IActivationFunction function = network[0][0].ActivationFunction;

    double[] previousLayerOutput;

    // Start by the output layer first
    int outputLayerIndex = network.LayersCount - 1;
    ActivationLayer outputLayer = network[outputLayerIndex];

    // If we have only one single layer, the previous layer outputs is given by the input layer
    previousLayerOutput = (outputLayerIndex == 0) ? input : network[outputLayerIndex - 1].Output;

    // Assume single output neuron
    ActivationNeuron outputNeuron = outputLayer[outputIndex];
    double[] neuronWeightDerivatives = weightDerivatives[outputLayerIndex][outputIndex];

    double output = outputNeuron.Output;
    double e = desiredOutput[outputIndex] - output;
    double derivative = function.Derivative2(output);

    // Set derivative for each weight in the neuron
    for (int i = 0; i < previousLayerOutput.Length; i++)
        neuronWeightDerivatives[i] = derivative * previousLayerOutput[i];

    // Set derivative for the current threshold (bias) term
    thresholdsDerivatives[outputLayerIndex][outputIndex] = derivative;


    // Now, proceed to the hidden layers
    for (int layerIndex = network.LayersCount - 2; layerIndex >= 0; layerIndex--)
    {
        int nextLayerIndex = layerIndex + 1;

        ActivationLayer layer = network[layerIndex];
        ActivationLayer nextLayer = network[nextLayerIndex];

        // If we are in the first layer, the previous layer is just the input layer
        previousLayerOutput = (layerIndex == 0) ? input : network[layerIndex - 1].Output;

        // Now, we will compute the derivatives for the current layer applying the chain
        //  rule. To apply the chain-rule, we will make use of the previous derivatives
        //  computed for the inner layers (forming a calculation chain, hence the name).

        // So, for each neuron in the current layer:
        for (int neuronIndex = 0; neuronIndex < layer.NeuronsCount; neuronIndex++)
        {
            ActivationNeuron neuron = layer[neuronIndex];

            neuronWeightDerivatives = weightDerivatives[layerIndex][neuronIndex];

            double[] layerDerivatives = thresholdsDerivatives[layerIndex];
            double[] nextLayerDerivatives = thresholdsDerivatives[layerIndex + 1];

            double sum = 0;

            // The chain-rule can be stated as (f(w*g(x))' = f'(w*g(x)) * w*g'(x)
            //
            // We will start computing the second part of the product. Since the g'
            //  derivatives have already been computed in the previous computation,
            //  we will be summing all previous function derivatives and weighting
            //  them using their connection weight (sinapses).
            //
            // So, for each neuron in the next layer:
            for (int j = 0; j < nextLayerDerivatives.Length; j++)
            {
                // retrieve the weight connecting the output of the current
                //   neuron and the activation function of the next neuron.
                double weight = nextLayer[j][neuronIndex];

                // accumulate the sinapse weight * next layer derivative
                sum += weight * nextLayerDerivatives[j];
            }

            // Continue forming the chain-rule statement
            derivative = sum * function.Derivative2(neuron.Output);

            // Set derivative for each weight in the neuron
            for (int i = 0; i < previousLayerOutput.Length; i++)
                neuronWeightDerivatives[i] = derivative * previousLayerOutput[i];

            // Set derivative for the current threshold
            layerDerivatives[neuronIndex] = derivative;

            // The threshold derivatives also gather the derivatives for
            // the layer, and thus can be re-used in next calculations.
        }
    }

    // return error
    return e;
}