Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "LevenbergMarquardtTraining.h"
- #include "NeuralNetwork.h"
- #include "ActivationFunctions.h"
- #include "../Libraries/alglib/solvers.h"
- LevenbergMarquardtTraining::LevenbergMarquardtTraining(NeuralNetwork * net)
- {
- //efficient algorithm for training neural networks with one hidden layer
- //http://cs.olemiss.edu/~ychen/publications/conference/chen_ijcnn99.pdf
- this->net = net;
- this->oCount = this->net->GetNeuronsCount(NeuralNetwork::OUTPUT);
- this->hCount = this->net->GetNeuronsCount(NeuralNetwork::HIDDEN);
- this->N = this->net->GetWeightsCount();
- this->K = this->oCount;
- this->e.resize(this->K);
- this->res.resize(this->N);
- this->jacobi.setlength(this->K, this->N);
- this->inverse.setlength(this->K, this->K);
- this->oDelta = Matrix2D(this->oCount, this->oCount);
- this->hDelta = Matrix2D(this->oCount, this->hCount);
- }
- LevenbergMarquardtTraining::~LevenbergMarquardtTraining()
- {
- }
- void LevenbergMarquardtTraining::Train(const std::vector<TrainPair> & train, double errTresshold, int maxIterCount)
- {
- this->bestWeights.clear();
- double actError = 0;
- double lastError = 0;
- double minError = DBL_MAX;
- int iterCount = 0;
- double percent = maxIterCount / 100.0;
- std::vector<double> weightBckp;
- int m = 1;
- this->mi = 0.01;
- //Main loop
- do
- {
- for (uint32 k = 0; k < this->K; k++)
- {
- this->e[k] = 0;
- }
- //clear jacobi
- for (uint32 k = 0; k < this->K; k++)
- {
- for (uint32 n = 0; n < this->N; n++)
- {
- this->jacobi(k, n) = 0;
- }
- }
- m = 1;
- actError = 0;
- for (int i = 0; i < train.size(); i++)
- {
- this->ForwardPass(train[i]);
- //calculate deltas for one training input / output
- this->BackwardPass();
- this->CalcJacobi();
- actError += this->TrainError(train[i].output, this->net->GetOutputs());
- }
- this->net->GetWeights(weightBckp);
- lastError = actError;
- JACOBI_UPDATE:
- actError = 0;
- this->CalcInverse();
- this->CalcUpdateRate();
- this->UpdateWeights(weightBckp);
- //calculate error after update
- //to check, if update was helpfull
- for (int i = 0; i < train.size(); i++)
- {
- this->net->Run(train[i].input);
- actError += this->TrainError(train[i].output, this->net->GetOutputs());
- }
- //according to usefulness of update, do something
- if (actError <= lastError)
- {
- this->mi /= 10;
- }
- else if (actError > lastError)
- {
- if (m <= 5)
- {
- m++;
- this->mi *= 10;
- this->net->SetWeights(weightBckp);
- goto JACOBI_UPDATE;
- }
- }
- else if (actError <= errTresshold)
- {
- break;
- }
- if (actError < minError)
- {
- minError = actError;
- this->net->GetWeights(this->bestWeights);
- }
- iterCount++;
- int actPercent = iterCount / percent;
- //if (actPercent % 20 == 0)
- {
- //printf("Trained: %d, min-error: %f\n", actPercent, minError);
- printf("Act-error: %f, min-error: %f\n", actError, minError);
- }
- } while (iterCount < maxIterCount);
- printf("Error best: %f \n", minError);
- this->net->GetWeights(this->bestWeights);
- }
- void LevenbergMarquardtTraining::ForwardPass(const TrainPair & train)
- {
- this->net->Run(train.input);
- std::vector<double> output = this->net->GetOutputs();
- for (uint32 i = 0; i < this->K; i++)
- {
- this->e[i] += (train.output[i] - output[i]) * (train.output[i] - output[i]);
- }
- }
- void LevenbergMarquardtTraining::BackwardPass()
- {
- /*
- this->hSlope.clear();
- this->oSlope.clear();
- this->net->GetSlope(NeuralNetwork::HIDDEN, this->hSlope);
- this->net->GetSlope(NeuralNetwork::OUTPUT, this->oSlope);
- */
- this->oGrads.clear();
- this->hGrads.clear();
- this->oGrads.resize(this->K);
- this->hGrads.resize(this->hCount);
- // 1. compute output gradients
- for (uint32 i = 0; i < this->K; i++)
- {
- //this->oGrads[i] = this->oSlope[i];
- this->oGrads[i] = this->net->hoFunc->DerivateFunction(this->net->hoSums[i]);
- }
- // 2. compute hidden gradients
- for (uint32 i = 0; i < this->hCount; i++)
- {
- double sum = 0.0;
- for (uint32 j = 0; j < this->K; j++)
- {
- sum += this->oGrads[j] * this->net->hoWeights.m[i][j]; // each downstream gradient * outgoing weight
- }
- //this->hGrads[i] = sum * this->hSlope[i];
- this->hGrads[i] = sum * this->net->ihFunc->DerivateFunction(this->net->ihSums[i]);
- }
- }
- void LevenbergMarquardtTraining::CalcJacobi()
- {
- std::vector<double> output = this->net->GetOutputs();
- //std::vector<double> weights;
- //this->net->GetWeights(weights);
- for (uint32 k = 0; k < this->K; k++)
- {
- uint32 n = 0;
- for (uint32 i = 0; i < this->net->inputCount; i++)
- {
- for (uint32 j = 0; j < this->net->hiddenCount; j++)
- {
- //this->jacobi(k, n++) += (/*this->net->inputs[i] * */ output[k] * this->hGrads[j]);
- //this->jacobi(k, n) += (this->net->inputs[i] * this->hGrads[j]);
- this->jacobi(k, n++) += (this->net->inputs[i] * this->hGrads[j]);
- }
- }
- for (uint32 i = 0; i < this->net->hiddenCount; i++)
- {
- //this->jacobi(k, n++) += (output[k] * this->hGrads[i]);
- //this->jacobi(k, n) += (this->hGrads[i]);
- this->jacobi(k, n++) += (this->hGrads[i]);
- }
- // 4. update hidden to output weights
- for (uint32 i = 0; i < this->net->hiddenCount; i++)
- {
- for (uint32 j = 0; j < this->net->outputCount; j++)
- {
- //this->jacobi(k, n++) += (/*this->net->ihOutputs[i] * */ output[k] * this->oGrads[j]);
- //this->jacobi(k, n) += (this->net->ihOutputs[i] * this->oGrads[j]);
- this->jacobi(k, n++) += (this->net->ihOutputs[i] * this->oGrads[j]);
- }
- }
- // 4b. update hidden to output biases
- for (uint32 i = 0; i < this->net->outputCount; i++)
- {
- //this->jacobi(k, n++) += (output[k] * this->oGrads[i]);
- //this->jacobi(k, n) += (this->oGrads[i]);
- this->jacobi(k, n++) += (this->oGrads[i]);
- }
- /*
- for (uint32 n = 0; n < this->N; n++)
- {
- double grad = 0;
- this->jacobi(k, n) += (output[k] * grad);
- //this->jacobi(k, n) = f->DerivateFunctionFromFx(this->e[k]);
- }
- */
- }
- }
- void LevenbergMarquardtTraining::CalcInverse()
- {
- //fill matrix with values
- //input = I + 1/mi * J * JT
- //http://www.alglib.net/translator/man/manual.cpp.html#example_ablas_d_gemm
- alglib::rmatrixgemm(this->K, this->K, this->N,
- 1.0 / this->mi,
- this->jacobi, 0, 0, 0,
- this->jacobi, 0, 0, 1,
- 0,
- this->inverse, 0, 0);
- for (uint32 k = 0; k < this->K; k++)
- {
- this->inverse(k, k) += 1.0; //add I matrix
- }
- //now compute Inverse
- //http://www.alglib.net/translator/man/manual.cpp.html#sub_rmatrixinverse
- alglib::ae_int_t info;
- alglib::matinvreport inv;
- alglib::rmatrixinverse(this->inverse, info, inv);
- }
- void LevenbergMarquardtTraining::CalcUpdateRate()
- {
- alglib::real_2d_array tmpNK;
- alglib::real_2d_array tmpNN;
- tmpNK.setlength(this->N, this->K);
- tmpNN.setlength(this->N, this->N);
- // (1/mi * I - 1/mi^2 * JT * Inverse * J) * JT * E
- //tmpNK = 1/mi^2 * JT * Inverse
- alglib::rmatrixgemm(this->N, this->K, this->K,
- 1.0 / (this->mi * this->mi),
- this->jacobi, 0, 0, 1,
- this->inverse, 0, 0, 0,
- 0,
- tmpNK, 0, 0);
- //tmpNN = tmpNK * J
- alglib::rmatrixgemm(this->N, this->N, this->K,
- 1,
- tmpNK, 0, 0, 0,
- this->jacobi, 0, 0, 0,
- 0,
- tmpNN, 0, 0);
- //tmpNN = 1 / mi * I - tmpNN
- for (uint32 i = 0; i < this->N; i++)
- {
- for (uint32 j = 0; j < this->N; j++)
- {
- tmpNN(i, j) *= -1;
- }
- }
- for (uint32 n = 0; n < this->N; n++)
- {
- tmpNN(n, n) += (1.0 / this->mi);
- }
- //tmpNK = tmpNN * JT
- alglib::rmatrixgemm(this->N, this->K, this->N,
- 1,
- tmpNN, 0, 0, 0,
- this->jacobi, 0, 0, 1,
- 0,
- tmpNK, 0, 0);
- for (uint32 n = 0; n < this->N; n++)
- {
- this->res[n] = 0;
- }
- //res = tmpNK * E
- for (uint32 n = 0; n < this->N; n++)
- {
- for (uint32 k = 0; k < this->K; k++)
- {
- this->res[n] += tmpNK(n, k) * this->e[k];
- }
- }
- }
- void LevenbergMarquardtTraining::UpdateWeights(const std::vector<double> & oldWeights)
- {
- std::vector<double> newWeights;
- newWeights.resize(this->N);
- for (uint32 n = 0; n < this->N; n++)
- {
- newWeights[n] = oldWeights[n] - this->res[n];
- }
- this->mi = 0;
- for (uint32 k = 0; k < this->K; k++)
- {
- this->mi += this->e[k] * this->e[k];
- }
- this->mi *= 0.01;
- this->net->SetWeights(newWeights);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement