Levenberq-Marquardt problem

#include "LevenbergMarquardtTraining.h"

#include "NeuralNetwork.h"
#include "ActivationFunctions.h"
#include "../Libraries/alglib/solvers.h"

LevenbergMarquardtTraining::LevenbergMarquardtTraining(NeuralNetwork * net)
{
    //efficient algorithm for training neural networks with one hidden layer
    //http://cs.olemiss.edu/~ychen/publications/conference/chen_ijcnn99.pdf

    this->net = net;

    this->oCount = this->net->GetNeuronsCount(NeuralNetwork::OUTPUT);
    this->hCount = this->net->GetNeuronsCount(NeuralNetwork::HIDDEN);

    this->N = this->net->GetWeightsCount();
    this->K = this->oCount;

    this->e.resize(this->K);
    this->res.resize(this->N);


    this->jacobi.setlength(this->K, this->N);
    this->inverse.setlength(this->K, this->K);


    this->oDelta = Matrix2D(this->oCount, this->oCount);
    this->hDelta = Matrix2D(this->oCount, this->hCount);


}

LevenbergMarquardtTraining::~LevenbergMarquardtTraining()
{
}

void LevenbergMarquardtTraining::Train(const std::vector<TrainPair> & train, double errTresshold, int maxIterCount)
{

    this->bestWeights.clear();


    double actError = 0;
    double lastError = 0;
    double minError = DBL_MAX;


    int iterCount = 0;
    double percent = maxIterCount / 100.0;

    std::vector<double> weightBckp;
    int m = 1;
    this->mi = 0.01;

    //Main loop
    do
    {

        for (uint32 k = 0; k < this->K; k++)
        {
            this->e[k] = 0;
        }


        //clear jacobi
        for (uint32 k = 0; k < this->K; k++)
        {
            for (uint32 n = 0; n < this->N; n++)
            {
                this->jacobi(k, n) = 0;
            }
        }

        m = 1;
        actError = 0;

        for (int i = 0; i < train.size(); i++)
        {
            this->ForwardPass(train[i]);

            //calculate deltas for one training input / output
            this->BackwardPass();


            this->CalcJacobi();

            actError += this->TrainError(train[i].output, this->net->GetOutputs());
        }

        this->net->GetWeights(weightBckp);

        lastError = actError;


    JACOBI_UPDATE:
        actError = 0;
        this->CalcInverse();
        this->CalcUpdateRate();

        this->UpdateWeights(weightBckp);

        //calculate error after update
        //to check, if update was helpfull
        for (int i = 0; i < train.size(); i++)
        {
            this->net->Run(train[i].input);
            actError += this->TrainError(train[i].output, this->net->GetOutputs());
        }

        //according to usefulness of update, do something
        if (actError <= lastError)
        {
            this->mi /= 10;
        }
        else if (actError > lastError)
        {
            if (m <= 5)
            {
                m++;
                this->mi *= 10;
                this->net->SetWeights(weightBckp);
                goto JACOBI_UPDATE;
            }
        }
        else if (actError <= errTresshold)
        {
            break;
        }

        if (actError < minError)
        {
            minError = actError;
            this->net->GetWeights(this->bestWeights);
        }

        iterCount++;

        int actPercent = iterCount / percent;

        //if (actPercent % 20 == 0)
        {
            //printf("Trained: %d, min-error: %f\n", actPercent, minError);
            printf("Act-error: %f, min-error: %f\n", actError, minError);
        }


    } while (iterCount < maxIterCount);

    printf("Error best: %f \n", minError);


    this->net->GetWeights(this->bestWeights);

}

void LevenbergMarquardtTraining::ForwardPass(const TrainPair & train)
{
    this->net->Run(train.input);


    std::vector<double> output = this->net->GetOutputs();


    for (uint32 i = 0; i < this->K; i++)
    {
        this->e[i] += (train.output[i] - output[i]) * (train.output[i] - output[i]);
    }


}

void LevenbergMarquardtTraining::BackwardPass()
{
    /*
    this->hSlope.clear();
    this->oSlope.clear();

    this->net->GetSlope(NeuralNetwork::HIDDEN, this->hSlope);
    this->net->GetSlope(NeuralNetwork::OUTPUT, this->oSlope);
    */

    this->oGrads.clear();
    this->hGrads.clear();
    this->oGrads.resize(this->K);
    this->hGrads.resize(this->hCount);

    // 1. compute output gradients
    for (uint32 i = 0; i < this->K; i++)
    {
        //this->oGrads[i] = this->oSlope[i];
        this->oGrads[i] = this->net->hoFunc->DerivateFunction(this->net->hoSums[i]);
    }


    // 2. compute hidden gradients
    for (uint32 i = 0; i < this->hCount; i++)
    {
        double sum = 0.0;

        for (uint32 j = 0; j < this->K; j++)
        {
            sum += this->oGrads[j] * this->net->hoWeights.m[i][j]; // each downstream gradient * outgoing weight
        }
        //this->hGrads[i] = sum * this->hSlope[i];
        this->hGrads[i] = sum * this->net->ihFunc->DerivateFunction(this->net->ihSums[i]);
    }


}


void LevenbergMarquardtTraining::CalcJacobi()
{
    std::vector<double> output = this->net->GetOutputs();


    //std::vector<double> weights;
    //this->net->GetWeights(weights);

    for (uint32 k = 0; k < this->K; k++)
    {
        uint32 n = 0;

        for (uint32 i = 0; i < this->net->inputCount; i++)
        {
            for (uint32 j = 0; j < this->net->hiddenCount; j++)
            {
                //this->jacobi(k, n++) += (/*this->net->inputs[i] * */ output[k] * this->hGrads[j]);
                //this->jacobi(k, n) += (this->net->inputs[i] * this->hGrads[j]);
                this->jacobi(k, n++) += (this->net->inputs[i] * this->hGrads[j]);
            }
        }

        for (uint32 i = 0; i < this->net->hiddenCount; i++)
        {
            //this->jacobi(k, n++) += (output[k] * this->hGrads[i]);
            //this->jacobi(k, n) += (this->hGrads[i]);
            this->jacobi(k, n++) += (this->hGrads[i]);
        }


        // 4. update hidden to output weights
        for (uint32 i = 0; i < this->net->hiddenCount; i++)
        {
            for (uint32 j = 0; j < this->net->outputCount; j++)
            {
                //this->jacobi(k, n++) += (/*this->net->ihOutputs[i] * */ output[k] * this->oGrads[j]);
                //this->jacobi(k, n) += (this->net->ihOutputs[i] * this->oGrads[j]);
                this->jacobi(k, n++) += (this->net->ihOutputs[i] * this->oGrads[j]);
            }
        }

        // 4b. update hidden to output biases
        for (uint32 i = 0; i < this->net->outputCount; i++)
        {
            //this->jacobi(k, n++) += (output[k] * this->oGrads[i]);
            //this->jacobi(k, n) += (this->oGrads[i]);
            this->jacobi(k, n++) += (this->oGrads[i]);
        }

        /*
        for (uint32 n = 0; n < this->N; n++)
        {

            double grad = 0;


            this->jacobi(k, n) += (output[k] * grad);
            //this->jacobi(k, n) = f->DerivateFunctionFromFx(this->e[k]);
        }
        */
    }
}


void LevenbergMarquardtTraining::CalcInverse()
{
    //fill matrix with values
    //input = I + 1/mi * J * JT

    //http://www.alglib.net/translator/man/manual.cpp.html#example_ablas_d_gemm


    alglib::rmatrixgemm(this->K, this->K, this->N,
        1.0 / this->mi,
        this->jacobi, 0, 0, 0,
        this->jacobi, 0, 0, 1,
        0,
        this->inverse, 0, 0);


    for (uint32 k = 0; k < this->K; k++)
    {
        this->inverse(k, k) += 1.0; //add I matrix
    }


    //now compute Inverse
    //http://www.alglib.net/translator/man/manual.cpp.html#sub_rmatrixinverse
    alglib::ae_int_t info;
    alglib::matinvreport inv;
    alglib::rmatrixinverse(this->inverse, info, inv);


}

void LevenbergMarquardtTraining::CalcUpdateRate()
{
    alglib::real_2d_array tmpNK;
    alglib::real_2d_array tmpNN;
    tmpNK.setlength(this->N, this->K);
    tmpNN.setlength(this->N, this->N);


    // (1/mi * I - 1/mi^2 * JT * Inverse * J) * JT * E

    //tmpNK = 1/mi^2 * JT * Inverse
    alglib::rmatrixgemm(this->N, this->K, this->K,
        1.0 / (this->mi * this->mi),
        this->jacobi, 0, 0, 1,
        this->inverse, 0, 0, 0,
        0,
        tmpNK, 0, 0);


    //tmpNN = tmpNK * J
    alglib::rmatrixgemm(this->N, this->N, this->K,
        1,
        tmpNK, 0, 0, 0,
        this->jacobi, 0, 0, 0,
        0,
        tmpNN, 0, 0);


    //tmpNN = 1 / mi * I -  tmpNN
    for (uint32 i = 0; i < this->N; i++)
    {
        for (uint32 j = 0; j < this->N; j++)
        {
            tmpNN(i, j) *= -1;
        }
    }

    for (uint32 n = 0; n < this->N; n++)
    {
        tmpNN(n, n) += (1.0 / this->mi);
    }


    //tmpNK = tmpNN * JT
    alglib::rmatrixgemm(this->N, this->K, this->N,
        1,
        tmpNN, 0, 0, 0,
        this->jacobi, 0, 0, 1,
        0,
        tmpNK, 0, 0);


    for (uint32 n = 0; n < this->N; n++)
    {
        this->res[n] = 0;
    }


    //res = tmpNK * E
    for (uint32 n = 0; n < this->N; n++)
    {
        for (uint32 k = 0; k < this->K; k++)
        {
            this->res[n] += tmpNK(n, k) * this->e[k];
        }
    }

}

void LevenbergMarquardtTraining::UpdateWeights(const std::vector<double> & oldWeights)
{
    std::vector<double> newWeights;
    newWeights.resize(this->N);


    for (uint32 n = 0; n < this->N; n++)
    {
        newWeights[n] = oldWeights[n] - this->res[n];
    }


    this->mi = 0;
    for (uint32 k = 0; k < this->K; k++)
    {
        this->mi += this->e[k] * this->e[k];
    }
    this->mi *= 0.01;

    this->net->SetWeights(newWeights);
}