#include "LevenbergMarquardtTraining.h"
#include "NeuralNetwork.h"
#include "ActivationFunctions.h"
#include "../Libraries/alglib/solvers.h"
LevenbergMarquardtTraining::LevenbergMarquardtTraining(NeuralNetwork * net)
{
//efficient algorithm for training neural networks with one hidden layer
//http://cs.olemiss.edu/~ychen/publications/conference/chen_ijcnn99.pdf
this->net = net;
this->oCount = this->net->GetNeuronsCount(NeuralNetwork::OUTPUT);
this->hCount = this->net->GetNeuronsCount(NeuralNetwork::HIDDEN);
this->N = this->net->GetWeightsCount();
this->K = this->oCount;
this->e.resize(this->K);
this->res.resize(this->N);
this->jacobi.setlength(this->K, this->N);
this->inverse.setlength(this->K, this->K);
this->oDelta = Matrix2D(this->oCount, this->oCount);
this->hDelta = Matrix2D(this->oCount, this->hCount);
}
LevenbergMarquardtTraining::~LevenbergMarquardtTraining()
{
}
void LevenbergMarquardtTraining::Train(const std::vector<TrainPair> & train, double errTresshold, int maxIterCount)
{
this->bestWeights.clear();
double actError = 0;
double lastError = 0;
double minError = DBL_MAX;
int iterCount = 0;
double percent = maxIterCount / 100.0;
std::vector<double> weightBckp;
int m = 1;
this->mi = 0.01;
//Main loop
do
{
for (uint32 k = 0; k < this->K; k++)
{
this->e[k] = 0;
}
//clear jacobi
for (uint32 k = 0; k < this->K; k++)
{
for (uint32 n = 0; n < this->N; n++)
{
this->jacobi(k, n) = 0;
}
}
m = 1;
actError = 0;
for (int i = 0; i < train.size(); i++)
{
this->ForwardPass(train[i]);
//calculate deltas for one training input / output
this->BackwardPass();
this->CalcJacobi();
actError += this->TrainError(train[i].output, this->net->GetOutputs());
}
this->net->GetWeights(weightBckp);
lastError = actError;
JACOBI_UPDATE:
actError = 0;
this->CalcInverse();
this->CalcUpdateRate();
this->UpdateWeights(weightBckp);
//calculate error after update
//to check, if update was helpfull
for (int i = 0; i < train.size(); i++)
{
this->net->Run(train[i].input);
actError += this->TrainError(train[i].output, this->net->GetOutputs());
}
//according to usefulness of update, do something
if (actError <= lastError)
{
this->mi /= 10;
}
else if (actError > lastError)
{
if (m <= 5)
{
m++;
this->mi *= 10;
this->net->SetWeights(weightBckp);
goto JACOBI_UPDATE;
}
}
else if (actError <= errTresshold)
{
break;
}
if (actError < minError)
{
minError = actError;
this->net->GetWeights(this->bestWeights);
}
iterCount++;
int actPercent = iterCount / percent;
//if (actPercent % 20 == 0)
{
//printf("Trained: %d, min-error: %f\n", actPercent, minError);
printf("Act-error: %f, min-error: %f\n", actError, minError);
}
} while (iterCount < maxIterCount);
printf("Error best: %f \n", minError);
this->net->GetWeights(this->bestWeights);
}
void LevenbergMarquardtTraining::ForwardPass(const TrainPair & train)
{
this->net->Run(train.input);
std::vector<double> output = this->net->GetOutputs();
for (uint32 i = 0; i < this->K; i++)
{
this->e[i] += (train.output[i] - output[i]) * (train.output[i] - output[i]);
}
}
void LevenbergMarquardtTraining::BackwardPass()
{
/*
this->hSlope.clear();
this->oSlope.clear();
this->net->GetSlope(NeuralNetwork::HIDDEN, this->hSlope);
this->net->GetSlope(NeuralNetwork::OUTPUT, this->oSlope);
*/
this->oGrads.clear();
this->hGrads.clear();
this->oGrads.resize(this->K);
this->hGrads.resize(this->hCount);
// 1. compute output gradients
for (uint32 i = 0; i < this->K; i++)
{
//this->oGrads[i] = this->oSlope[i];
this->oGrads[i] = this->net->hoFunc->DerivateFunction(this->net->hoSums[i]);
}
// 2. compute hidden gradients
for (uint32 i = 0; i < this->hCount; i++)
{
double sum = 0.0;
for (uint32 j = 0; j < this->K; j++)
{
sum += this->oGrads[j] * this->net->hoWeights.m[i][j]; // each downstream gradient * outgoing weight
}
//this->hGrads[i] = sum * this->hSlope[i];
this->hGrads[i] = sum * this->net->ihFunc->DerivateFunction(this->net->ihSums[i]);
}
}
void LevenbergMarquardtTraining::CalcJacobi()
{
std::vector<double> output = this->net->GetOutputs();
//std::vector<double> weights;
//this->net->GetWeights(weights);
for (uint32 k = 0; k < this->K; k++)
{
uint32 n = 0;
for (uint32 i = 0; i < this->net->inputCount; i++)
{
for (uint32 j = 0; j < this->net->hiddenCount; j++)
{
//this->jacobi(k, n++) += (/*this->net->inputs[i] * */ output[k] * this->hGrads[j]);
//this->jacobi(k, n) += (this->net->inputs[i] * this->hGrads[j]);
this->jacobi(k, n++) += (this->net->inputs[i] * this->hGrads[j]);
}
}
for (uint32 i = 0; i < this->net->hiddenCount; i++)
{
//this->jacobi(k, n++) += (output[k] * this->hGrads[i]);
//this->jacobi(k, n) += (this->hGrads[i]);
this->jacobi(k, n++) += (this->hGrads[i]);
}
// 4. update hidden to output weights
for (uint32 i = 0; i < this->net->hiddenCount; i++)
{
for (uint32 j = 0; j < this->net->outputCount; j++)
{
//this->jacobi(k, n++) += (/*this->net->ihOutputs[i] * */ output[k] * this->oGrads[j]);
//this->jacobi(k, n) += (this->net->ihOutputs[i] * this->oGrads[j]);
this->jacobi(k, n++) += (this->net->ihOutputs[i] * this->oGrads[j]);
}
}
// 4b. update hidden to output biases
for (uint32 i = 0; i < this->net->outputCount; i++)
{
//this->jacobi(k, n++) += (output[k] * this->oGrads[i]);
//this->jacobi(k, n) += (this->oGrads[i]);
this->jacobi(k, n++) += (this->oGrads[i]);
}
/*
for (uint32 n = 0; n < this->N; n++)
{
double grad = 0;
this->jacobi(k, n) += (output[k] * grad);
//this->jacobi(k, n) = f->DerivateFunctionFromFx(this->e[k]);
}
*/
}
}
void LevenbergMarquardtTraining::CalcInverse()
{
//fill matrix with values
//input = I + 1/mi * J * JT
//http://www.alglib.net/translator/man/manual.cpp.html#example_ablas_d_gemm
alglib::rmatrixgemm(this->K, this->K, this->N,
1.0 / this->mi,
this->jacobi, 0, 0, 0,
this->jacobi, 0, 0, 1,
0,
this->inverse, 0, 0);
for (uint32 k = 0; k < this->K; k++)
{
this->inverse(k, k) += 1.0; //add I matrix
}
//now compute Inverse
//http://www.alglib.net/translator/man/manual.cpp.html#sub_rmatrixinverse
alglib::ae_int_t info;
alglib::matinvreport inv;
alglib::rmatrixinverse(this->inverse, info, inv);
}
void LevenbergMarquardtTraining::CalcUpdateRate()
{
alglib::real_2d_array tmpNK;
alglib::real_2d_array tmpNN;
tmpNK.setlength(this->N, this->K);
tmpNN.setlength(this->N, this->N);
// (1/mi * I - 1/mi^2 * JT * Inverse * J) * JT * E
//tmpNK = 1/mi^2 * JT * Inverse
alglib::rmatrixgemm(this->N, this->K, this->K,
1.0 / (this->mi * this->mi),
this->jacobi, 0, 0, 1,
this->inverse, 0, 0, 0,
0,
tmpNK, 0, 0);
//tmpNN = tmpNK * J
alglib::rmatrixgemm(this->N, this->N, this->K,
1,
tmpNK, 0, 0, 0,
this->jacobi, 0, 0, 0,
0,
tmpNN, 0, 0);
//tmpNN = 1 / mi * I - tmpNN
for (uint32 i = 0; i < this->N; i++)
{
for (uint32 j = 0; j < this->N; j++)
{
tmpNN(i, j) *= -1;
}
}
for (uint32 n = 0; n < this->N; n++)
{
tmpNN(n, n) += (1.0 / this->mi);
}
//tmpNK = tmpNN * JT
alglib::rmatrixgemm(this->N, this->K, this->N,
1,
tmpNN, 0, 0, 0,
this->jacobi, 0, 0, 1,
0,
tmpNK, 0, 0);
for (uint32 n = 0; n < this->N; n++)
{
this->res[n] = 0;
}
//res = tmpNK * E
for (uint32 n = 0; n < this->N; n++)
{
for (uint32 k = 0; k < this->K; k++)
{
this->res[n] += tmpNK(n, k) * this->e[k];
}
}
}
void LevenbergMarquardtTraining::UpdateWeights(const std::vector<double> & oldWeights)
{
std::vector<double> newWeights;
newWeights.resize(this->N);
for (uint32 n = 0; n < this->N; n++)
{
newWeights[n] = oldWeights[n] - this->res[n];
}
this->mi = 0;
for (uint32 k = 0; k < this->K; k++)
{
this->mi += this->e[k] * this->e[k];
}
this->mi *= 0.01;
this->net->SetWeights(newWeights);
}