Advertisement
YourMain12

GithubNeuralNetwork Beta

Jan 3rd, 2024 (edited)
630
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 10.88 KB | None | 0 0
  1. using System;
  2. using System.Linq;
  3.  
  4. public class GithubNeuralNetwork
  5. {
  6.     private int[] _layers;
  7.     private Matrix[] _weights;
  8.     private Matrix[] _biases;
  9.     private Func<Matrix, Matrix>[] _activationFunctions;
  10.     private double _learningRate;
  11.     private double _epsilon;
  12.     private Matrix[] _gamma;
  13.     private Matrix[] _beta;
  14.     private double _initialLearningRate;
  15.     private double _decayRate;
  16.     private string _optimizer;
  17.     private Matrix[] _movingMeans;
  18.     private Matrix[] _movingVariances;
  19.     private Matrix[] _mWeights;
  20.     private Matrix[] _vWeights;
  21.     private Matrix[] _mBiases;
  22.     private Matrix[] _vBiases;
  23.     private Matrix[] _mGamma;
  24.     private Matrix[] _vGamma;
  25.     private Matrix[] _mBeta;
  26.     private Matrix[] _vBeta;
  27.     private int _t;
  28.  
  29.     public GithubNeuralNetwork(double learningRate, double epsilon, string optimizer, double decayRate, params int[] layers)
  30.     {
  31.         _layers = layers;
  32.         _weights = new Matrix[layers.Length - 1];
  33.         _biases = new Matrix[layers.Length - 1];
  34.         _activationFunctions = new Func<Matrix, Matrix>[layers.Length - 1];
  35.         _learningRate = learningRate;
  36.         _epsilon = epsilon;
  37.         _gamma = new Matrix[layers.Length - 1];
  38.         _beta = new Matrix[layers.Length - 1];
  39.         _initialLearningRate = learningRate;
  40.         _decayRate = decayRate;
  41.         _optimizer = optimizer;
  42.         _movingMeans = new Matrix[layers.Length - 1];
  43.         _movingVariances = new Matrix[layers.Length - 1];
  44.         _mWeights = new Matrix[layers.Length - 1];
  45.         _vWeights = new Matrix[layers.Length - 1];
  46.         _mBiases = new Matrix[layers.Length - 1];
  47.         _vBiases = new Matrix[layers.Length - 1];
  48.         _mGamma = new Matrix[layers.Length - 1];
  49.         _vGamma = new Matrix[layers.Length - 1];
  50.         _mBeta = new Matrix[layers.Length - 1];
  51.         _vBeta = new Matrix[layers.Length - 1];
  52.         _t = 1;
  53.  
  54.         InitializeWeightsAndBiases();
  55.         SetActivationFunctions();
  56.     }
  57.  
  58.     private void InitializeWeightsAndBiases()
  59.     {
  60.         Random rand = new Random();
  61.         for (int i = 0; i < _weights.Length; i++)
  62.         {
  63.             _weights[i] = XavierInitialization(_layers[i + 1], _layers[i], rand);
  64.             _biases[i] = Matrix.Zeros(_layers[i + 1], 1);
  65.             _gamma[i] = Matrix.Ones(_layers[i + 1], 1);
  66.             _beta[i] = Matrix.Zeros(_layers[i + 1], 1);
  67.  
  68.             _movingMeans[i] = Matrix.Zeros(_layers[i + 1], 1);
  69.             _movingVariances[i] = Matrix.Ones(_layers[i + 1], 1);
  70.  
  71.             _mWeights[i] = Matrix.Zeros(_weights[i].RowCount, _weights[i].ColumnCount);
  72.             _vWeights[i] = Matrix.Zeros(_weights[i].RowCount, _weights[i].ColumnCount);
  73.             _mBiases[i] = Matrix.Zeros(_biases[i].RowCount, _biases[i].ColumnCount);
  74.             _vBiases[i] = Matrix.Zeros(_biases[i].RowCount, _biases[i].ColumnCount);
  75.             _mGamma[i] = Matrix.Zeros(_gamma[i].RowCount, _gamma[i].ColumnCount);
  76.             _vGamma[i] = Matrix.Zeros(_gamma[i].RowCount, _gamma[i].ColumnCount);
  77.             _mBeta[i] = Matrix.Zeros(_beta[i].RowCount, _beta[i].ColumnCount);
  78.             _vBeta[i] = Matrix.Zeros(_beta[i].RowCount, _beta[i].ColumnCount);
  79.         }
  80.     }
  81.  
  82.     private void SetActivationFunctions()
  83.     {
  84.         for (int i = 0; i < _activationFunctions.Length; i++)
  85.         {
  86.             _activationFunctions[i] = MatrixFunctions.ReLU; // Using Rectified Linear Unit (ReLU) as default activation function
  87.         }
  88.     }
  89.  
  90.     public void SetActivationFunction(int layerIndex, Func<Matrix, Matrix> activationFunction)
  91.     {
  92.         if (layerIndex < 0 || layerIndex >= _activationFunctions.Length)
  93.         {
  94.             throw new ArgumentOutOfRangeException(nameof(layerIndex));
  95.         }
  96.         _activationFunctions[layerIndex] = activationFunction;
  97.     }
  98.  
  99.     private Matrix XavierInitialization(int rows, int cols, Random rand)
  100.     {
  101.         double scale = Math.Sqrt(2.0 / (rows + cols));
  102.         return Matrix.RandomMatrix(rows, cols, rand) * scale;
  103.     }
  104.  
  105.     private Matrix LayerNormalization(Matrix x, Matrix gamma, Matrix beta, int layerIndex)
  106.     {
  107.         Matrix mean = MatrixFunctions.Mean(x, axis: 1);
  108.         Matrix variance = MatrixFunctions.Variance(x, axis: 1);
  109.  
  110.         _movingMeans[layerIndex] = (_movingMeans[layerIndex] * 0.9) + (mean * 0.1);
  111.         _movingVariances[layerIndex] = (_movingVariances[layerIndex] * 0.9) + (variance * 0.1);
  112.  
  113.         Matrix normalized = (x - mean) / MatrixFunctions.Sqrt(variance + _epsilon);
  114.         return (gamma * normalized) + beta;
  115.     }
  116.  
  117.     private void Optimizer(Matrix[] gradientsWeights, Matrix[] gradientsBiases, Matrix[] gradientsGamma, Matrix[] gradientsBeta)
  118.     {
  119.         double beta1 = 0.9;
  120.         double beta2 = 0.999;
  121.         double epsilon = 1e-8;
  122.  
  123.         for (int i = 0; i < _weights.Length; i++)
  124.         {
  125.             if (_optimizer == "Adam")
  126.             {
  127.                 _mWeights[i] = (beta1 * _mWeights[i]) + ((1 - beta1) * gradientsWeights[i]);
  128.                 _vWeights[i] = (beta2 * _vWeights[i]) + ((1 - beta2) * (gradientsWeights[i] * gradientsWeights[i]));
  129.  
  130.                 _mBiases[i] = (beta1 * _mBiases[i]) + ((1 - beta1) * gradientsBiases[i]);
  131.                 _vBiases[i] = (beta2 * _vBiases[i]) + ((1 - beta2) * (gradientsBiases[i] * gradientsBiases[i]));
  132.  
  133.                 _mGamma[i] = (beta1 * _mGamma[i]) + ((1 - beta1) * gradientsGamma[i]);
  134.                 _vGamma[i] = (beta2 * _vGamma[i]) + ((1 - beta2) * (gradientsGamma[i] * gradientsGamma[i]));
  135.  
  136.                 _mBeta[i] = (beta1 * _mBeta[i]) + ((1 - beta1) * gradientsBeta[i]);
  137.                 _vBeta[i] = (beta2 * _vBeta[i]) + ((1 - beta2) * (gradientsBeta[i] * gradientsBeta[i]));
  138.  
  139.                 Matrix mHatWeights = _mWeights[i] / (1 - Math.Pow(beta1, _t));
  140.                 Matrix vHatWeights = _vWeights[i] / (1 - Math.Pow(beta2, _t));
  141.  
  142.                 Matrix mHatBiases = _mBiases[i] / (1 - Math.Pow(beta1, _t));
  143.                 Matrix vHatBiases = _vBiases[i] / (1 - Math.Pow(beta2, _t));
  144.  
  145.                 Matrix mHatGamma = _mGamma[i] / (1 - Math.Pow(beta1, _t));
  146.                 Matrix vHatGamma = _vGamma[i] / (1 - Math.Pow(beta2, _t));
  147.  
  148.                 Matrix mHatBeta = _mBeta[i] / (1 - Math.Pow(beta1, _t));
  149.                 Matrix vHatBeta = _vBeta[i] / (1 - Math.Pow(beta2, _t));
  150.  
  151.                 _weights[i] -= (_learningRate * mHatWeights) / (MatrixFunctions.Sqrt(vHatWeights) + epsilon);
  152.                 _biases[i] -= (_learningRate * mHatBiases) / (MatrixFunctions.Sqrt(vHatBiases) + epsilon);
  153.                 _gamma[i] -= (_learningRate * mHatGamma) / (MatrixFunctions.Sqrt(vHatGamma) + epsilon);
  154.                 _beta[i] -= (_learningRate * mHatBeta) / (MatrixFunctions.Sqrt(vHatBeta) + epsilon);
  155.  
  156.                 _t++;
  157.             }
  158.             else if (_optimizer == "AdaGrad")
  159.             {
  160.                 _vWeights[i] += gradientsWeights[i] * gradientsWeights[i];
  161.                 _vBiases[i] += gradientsBiases[i] * gradientsBiases[i];
  162.                 _vGamma[i] += gradientsGamma[i] * gradientsGamma[i];
  163.                 _vBeta[i] += gradientsBeta[i] * gradientsBeta[i];
  164.  
  165.                 _weights[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vWeights[i]) + epsilon)) * gradientsWeights[i];
  166.                 _biases[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vBiases[i]) + epsilon)) * gradientsBiases[i];
  167.                 _gamma[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vGamma[i]) + epsilon)) * gradientsGamma[i];
  168.                 _beta[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vBeta[i]) + epsilon)) * gradientsBeta[i];
  169.             }
  170.             else
  171.             {
  172.                 _weights[i] -= _learningRate * gradientsWeights[i];
  173.                 _biases[i] -= _learningRate * gradientsBiases[i];
  174.                 _gamma[i] -= _learningRate * gradientsGamma[i];
  175.                 _beta[i] -= _learningRate * gradientsBeta[i];
  176.             }
  177.         }
  178.     }
  179.  
  180.     private Matrix FeedForward(Matrix input, bool training)
  181.     {
  182.         Matrix outputs = input;
  183.         Matrix[] dropoutMasks = new Matrix[_weights.Length];
  184.  
  185.         for (int i = 0; i < _weights.Length; i++)
  186.         {
  187.             if (training)
  188.             {
  189.                 outputs = outputs * _weights[i] + _biases[i];
  190.                 outputs = LayerNormalization(outputs, _gamma[i], _beta[i], i);
  191.                 outputs = outputs.Map(_activationFunctions[i]);
  192.             }
  193.             else
  194.             {
  195.                 outputs = outputs * _weights[i] + _biases[i];
  196.                 outputs = outputs.Map(_activationFunctions[i]);
  197.             }
  198.         }
  199.         return outputs;
  200.     }
  201.  
  202.     private void Backpropagation(Matrix input, Matrix target)
  203.     {
  204.         Matrix[] outputs = new Matrix[_weights.Length + 1];
  205.         outputs[0] = input;
  206.  
  207.         for (int i = 0; i < _weights.Length; i++)
  208.         {
  209.             outputs[i + 1] = outputs[i] * _weights[i] + _biases[i];
  210.             outputs[i + 1] = outputs[i + 1].Map(_activationFunctions[i]);
  211.         }
  212.  
  213.         Matrix[] errors = new Matrix[_weights.Length];
  214.         errors[_weights.Length - 1] = outputs[^1] - target;
  215.  
  216.         for (int i = _weights.Length - 2; i >= 0; i--)
  217.         {
  218.             errors[i] = (_weights[i + 1].Transpose() * errors[i + 1]).MapDerivative(_activationFunctions[i]);
  219.         }
  220.  
  221.         Matrix[] gradientsWeights = new Matrix[_weights.Length];
  222.         Matrix[] gradientsBiases = new Matrix[_weights.Length];
  223.         Matrix[] gradientsGamma = new Matrix[_weights.Length];
  224.         Matrix[] gradientsBeta = new Matrix[_weights.Length];
  225.  
  226.         for (int i = 0; i < _weights.Length; i++)
  227.         {
  228.             gradientsWeights[i] = errors[i] * outputs[i].Transpose();
  229.             gradientsBiases[i] = errors[i];
  230.             gradientsGamma[i] = errors[i] * _movingMeans[i];
  231.             gradientsBeta[i] = errors[i] * _movingVariances[i];
  232.         }
  233.  
  234.         Optimizer(gradientsWeights, gradientsBiases, gradientsGamma, gradientsBeta);
  235.     }
  236.  
  237.     private void LearningRateScheduler(int epoch)
  238.     {
  239.         _learningRate = _initialLearningRate / (1 + _decayRate * epoch);
  240.     }
  241.  
  242.     public void Train(Matrix[] inputs, Matrix[] targets, int epochs, int batchSize)
  243.     {
  244.         Random rand = new Random();
  245.  
  246.         for (int epoch = 0; epoch < epochs; epoch++)
  247.         {
  248.             for (int i = 0; i < inputs.Length; i += batchSize)
  249.             {
  250.                 Matrix[] batchInputs = inputs.Skip(i).Take(batchSize).ToArray();
  251.                 Matrix[] batchTargets = targets.Skip(i).Take(batchSize).ToArray();
  252.  
  253.                 for (int j = 0; j < batchSize; j++)
  254.                 {
  255.                     Matrix outputs = FeedForward(batchInputs[j], true);
  256.                     Backpropagation(batchInputs[j], batchTargets[j]);
  257.                 }
  258.             }
  259.  
  260.             LearningRateScheduler(epoch);
  261.         }
  262.     }
  263.  
  264.     public Matrix Predict(Matrix input)
  265.     {
  266.         return FeedForward(input, false);
  267.     }
  268. }
  269.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement