Advertisement
YourMain12

GithubNeuralNetworkDevBuild - 04012024-147

Jan 4th, 2024
797
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 43.61 KB | None | 0 0
  1. using System;
  2. using System.Linq;
  3.  
  4. public enum ActivationFunction
  5. {
  6.     ReLU,
  7.     Sigmoid,
  8.     Tanh,
  9.     LeakyReLU,
  10.     Swish,
  11.     Mish,
  12.     GELU
  13. }
  14.  
  15. public enum Regularizer
  16. {
  17.     None,
  18.     L1,
  19.     L2
  20. }
  21.  
  22. public class GithubNeuralNetwork
  23. {
  24.     private int[] _layers;
  25.     private Matrix[] _weights;
  26.     private Matrix[] _biases;
  27.     private Func<Matrix, Matrix>[] _activationFunctions;
  28.     private double _learningRate;
  29.     private double _epsilon;
  30.     private Matrix[] _gamma;
  31.     private Matrix[] _beta;
  32.     private double _initialLearningRate;
  33.     private double _decayRate;
  34.     private string _optimizer;
  35.     private Matrix[] _movingMeans;
  36.     private Matrix[] _movingVariances;
  37.     private Matrix[] _mWeights;
  38.     private Matrix[] _vWeights;
  39.     private Matrix[] _mBiases;
  40.     private Matrix[] _vBiases;
  41.     private Matrix[] _mGamma;
  42.     private Matrix[] _vGamma;
  43.     private Matrix[] _mBeta;
  44.     private Matrix[] _vBeta;
  45.     private Matrix[] _slowWeights;
  46.     private Matrix[] _slowBiases;
  47.     private double _lookaheadAlpha;
  48.     private double _lookaheadBeta;
  49.     private int _t;
  50.     private double _dropoutRate;
  51.     private Matrix[] _dropoutMasks;
  52.     private ActivationFunction[] _activationOptions;
  53.     private Regularizer _regularizer;
  54.     private double _lambda;
  55.     private double _dropblockKeepProb;
  56.     private int _dropblockSize;
  57.     private double _maxLearningRate;
  58.     private double _baseLearningRate;
  59.     private int _stepSize;
  60.     private int _cycle;
  61.     private int _iterations;
  62.  
  63.     public GithubNeuralNetwork(double learningRate, double epsilon, string optimizer, double decayRate, double dropoutRate, Regularizer regularizer, double lambda, params int[] layers, double lookaheadAlpha = 0.5, double lookaheadBeta = 0.9, bool useGroupNormalization, int numGroups, int epochs, int batchSize)
  64.     {
  65.         _layers = layers;
  66.         _weights = new Matrix[layers.Length - 1];
  67.         _biases = new Matrix[layers.Length - 1];
  68.         _activationFunctions = new Func<Matrix, Matrix>[layers.Length - 1];
  69.         _learningRate = learningRate;
  70.         _epsilon = epsilon;
  71.         _gamma = new Matrix[layers.Length - 1];
  72.         _beta = new Matrix[layers.Length - 1];
  73.         _initialLearningRate = learningRate;
  74.         _decayRate = decayRate;
  75.         _optimizer = optimizer;
  76.         _movingMeans = new Matrix[layers.Length - 1];
  77.         _movingVariances = new Matrix[layers.Length - 1];
  78.         _mWeights = new Matrix[layers.Length - 1];
  79.         _vWeights = new Matrix[layers.Length - 1];
  80.         _mBiases = new Matrix[layers.Length - 1];
  81.         _vBiases = new Matrix[layers.Length - 1];
  82.         _mGamma = new Matrix[layers.Length - 1];
  83.         _vGamma = new Matrix[layers.Length - 1];
  84.         _mBeta = new Matrix[layers.Length - 1];
  85.         _vBeta = new Matrix[layers.Length - 1];
  86.         _slowWeights = new Matrix[_weights.Length];
  87.         _slowBiases = new Matrix[_biases.Length];
  88.         _lookaheadAlpha = lookaheadAlpha;
  89.         _lookaheadBeta = lookaheadBeta;
  90.         _t = 1;
  91.         _dropoutRate = dropoutRate;
  92.         _dropoutMasks = new Matrix[layers.Length - 1];
  93.         _activationOptions = new ActivationFunction[layers.Length - 1];
  94.         _regularizer = regularizer;
  95.         _lambda = lambda;
  96.         _dropblockKeepProb = dropblockKeepProb;
  97.         _dropblockSize = dropblockSize;
  98.         _maxLearningRate = maxLearningRate;
  99.         _baseLearningRate = baseLearningRate;
  100.         _stepSize = stepSize;
  101.         _cycle = 0;
  102.         _iterations = 0;
  103.  
  104.         InitializeWeightsAndBiases();
  105.         SetActivationFunctions();
  106.         InitializeSlowWeightsAndBiases();
  107.         InitializeRAdamParameters();
  108.     }
  109.    
  110.     private void InitializeSlowWeightsAndBiases()
  111.     {
  112.         for (int i = 0; i < _weights.Length; i++)
  113.         {
  114.             _slowWeights[i] = _weights[i].Copy();
  115.             _slowBiases[i] = _biases[i].Copy();
  116.         }
  117.     }
  118.    
  119.     private Matrix ResidualBlock(Matrix input, int layerIndex)
  120.     {
  121.         Matrix residual = input;
  122.         Matrix outputs = input;
  123.        
  124.         int numLayersInBlock = 2;
  125.         int units = _layers[layerIndex + 1];
  126.  
  127.         for (int i = 0; i < numLayersInBlock; i++)
  128.         {
  129.             Matrix layerOutput = outputs * _weights[layerIndex] + _biases[layerIndex];
  130.             layerOutput = layerOutput.Map(_activationFunctions[layerIndex]);
  131.  
  132.             outputs = layerOutput;
  133.         }
  134.        
  135.         if (outputs.RowCount == residual.RowCount && outputs.ColumnCount == residual.ColumnCount)
  136.         {
  137.             outputs += residual; // Adding the shortcut (residual) to the output
  138.         }
  139.  
  140.         return outputs;
  141.     }
  142.    
  143.     private void LookaheadOptimizer(Matrix[] gradientsWeights, Matrix[] gradientsBiases)
  144.     {
  145.         for (int i = 0; i < _weights.Length; i++)
  146.         {
  147.             _slowWeights[i] = (_lookaheadAlpha * _slowWeights[i]) + ((1 - _lookaheadAlpha) * _weights[i]);
  148.             _slowBiases[i] = (_lookaheadAlpha * _slowBiases[i]) + ((1 - _lookaheadAlpha) * _biases[i]);
  149.  
  150.             _weights[i] -= _learningRate * (_lookaheadBeta * gradientsWeights[i] + (1 - _lookaheadBeta) * (_slowWeights[i]));
  151.             _biases[i] -= _learningRate * (_lookaheadBeta * gradientsBiases[i] + (1 - _lookaheadBeta) * (_slowBiases[i]));
  152.         }
  153.     }
  154.    
  155.     private void Optimizer(Matrix[] gradientsWeights, Matrix[] gradientsBiases, /* Existing parameters */)
  156.     {
  157.  
  158.     private void InitializeWeightsAndBiases()
  159.     {
  160.         Random rand = new Random();
  161.         for (int i = 0; i < _weights.Length; i++)
  162.         {
  163.             _weights[i] = XavierInitialization(_layers[i + 1], _layers[i], rand);
  164.             _biases[i] = Matrix.Zeros(_layers[i + 1], 1);
  165.             _gamma[i] = Matrix.Ones(_layers[i + 1], 1);
  166.             _beta[i] = Matrix.Zeros(_layers[i + 1], 1);
  167.  
  168.             _movingMeans[i] = Matrix.Zeros(_layers[i + 1], 1);
  169.             _movingVariances[i] = Matrix.Ones(_layers[i + 1], 1);
  170.  
  171.             _mWeights[i] = Matrix.Zeros(_weights[i].RowCount, _weights[i].ColumnCount);
  172.             _vWeights[i] = Matrix.Zeros(_weights[i].RowCount, _weights[i].ColumnCount);
  173.             _mBiases[i] = Matrix.Zeros(_biases[i].RowCount, _biases[i].ColumnCount);
  174.             _vBiases[i] = Matrix.Zeros(_biases[i].RowCount, _biases[i].ColumnCount);
  175.             _mGamma[i] = Matrix.Zeros(_gamma[i].RowCount, _gamma[i].ColumnCount);
  176.             _vGamma[i] = Matrix.Zeros(_gamma[i].RowCount, _gamma[i].ColumnCount);
  177.             _mBeta[i] = Matrix.Zeros(_beta[i].RowCount, _beta[i].ColumnCount);
  178.             _vBeta[i] = Matrix.Zeros(_beta[i].RowCount, _beta[i].ColumnCount);
  179.         }
  180.     }
  181.    
  182.     private Matrix Swish(Matrix x)
  183.     {
  184.         return x * MatrixFunctions.Sigmoid(x);
  185.     }
  186.  
  187.     private Matrix Mish(Matrix x)
  188.     {
  189.         return x * MatrixFunctions.Tanh(MatrixFunctions.Softplus(x));
  190.     }
  191.  
  192.     private Matrix GELU(Matrix x)
  193.     {
  194.         return 0.5 * x * (1 + MatrixFunctions.Tanh((Math.Sqrt(2 / Math.PI) * (x + 0.044715 * Math.Pow(x, 3)))));
  195.     }
  196.  
  197.     private void SetActivationFunctions()
  198.     {
  199.         Random rand = new Random();
  200.         for (int i = 0; i < _activationOptions.Length; i++)
  201.         {
  202.             int choice = rand.Next(7); // Randomly choose an activation function
  203.             _activationOptions[i] = (ActivationFunction)choice;
  204.         }
  205.     }
  206.  
  207.     private Matrix XavierInitialization(int rows, int cols, Random rand)
  208.     {
  209.         double scale = Math.Sqrt(2.0 / (rows + cols));
  210.         return Matrix.RandomMatrix(rows, cols, rand) * scale;
  211.     }
  212.  
  213.     private Matrix LayerNormalization(Matrix x, Matrix gamma, Matrix beta, int layerIndex)
  214.     {
  215.         Matrix mean = MatrixFunctions.Mean(x, axis: 1);
  216.         Matrix variance = MatrixFunctions.Variance(x, axis: 1);
  217.  
  218.         _movingMeans[layerIndex] = (_movingMeans[layerIndex] * 0.9) + (mean * 0.1);
  219.         _movingVariances[layerIndex] = (_movingVariances[layerIndex] * 0.9) + (variance * 0.1);
  220.  
  221.         Matrix normalized = (x - mean) / MatrixFunctions.Sqrt(variance + _epsilon);
  222.         return (gamma * normalized) + beta;
  223.     }
  224.  
  225.     private Matrix FeedForward(Matrix input, bool training)
  226.     {
  227.         Matrix outputs = input;
  228.  
  229.         for (int i = 0; i < _weights.Length; i++)
  230.         {
  231.             if (training && _dropoutRate > 0.0)
  232.             {
  233.                 _dropoutMasks[i] = Matrix.RandomMatrix(outputs.RowCount, outputs.ColumnCount);
  234.                 _dropoutMasks[i] = _dropoutMasks[i].Map(x => x < _dropoutRate ? 0 : 1);
  235.                 outputs = outputs.PointwiseMultiply(_dropoutMasks[i]);
  236.                 outputs *= 1.0 / (1.0 - _dropoutRate); // Scale the remaining neurons
  237.             }
  238.  
  239.             outputs = outputs * _weights[i] + _biases[i];
  240.  
  241.             switch (_activationOptions[i])
  242.             {
  243.                 case ActivationFunction.ReLU:
  244.                     outputs = outputs.Map(MatrixFunctions.ReLU);
  245.                     break;
  246.                 case ActivationFunction.Sigmoid:
  247.                     outputs = outputs.Map(MatrixFunctions.Sigmoid);
  248.                     break;
  249.                 case ActivationFunction.Tanh:
  250.                     outputs = outputs.Map(MatrixFunctions.Tanh);
  251.                     break;
  252.                 case ActivationFunction.LeakyReLU:
  253.                     outputs = outputs.Map(MatrixFunctions.LeakyReLU);
  254.                     break;
  255.                 default:
  256.                     outputs = outputs.Map(MatrixFunctions.ReLU);
  257.                     break;
  258.             }
  259.         }
  260.         return outputs;
  261.        private Matrix FeedForward(Matrix input, bool training)
  262.     {
  263.         Matrix outputs = input;
  264.  
  265.         for (int i = 0; i < _weights.Length; i++)
  266.         {
  267.             if (training && _dropoutRate > 0.0)
  268.             {
  269.                 _dropoutMasks[i] = Matrix.RandomMatrix(outputs.RowCount, outputs.ColumnCount);
  270.                 _dropoutMasks[i] = _dropoutMasks[i].Map(x => x < _dropoutRate ? 0 : 1);
  271.                 outputs = outputs.PointwiseMultiply(_dropoutMasks[i]);
  272.                 outputs *= 1.0 / (1.0 - _dropoutRate); // Scale the remaining neurons
  273.             }
  274.  
  275.             outputs = outputs * _weights[i] + _biases[i];
  276.  
  277.             switch (_activationOptions[i])
  278.             {
  279.                 case ActivationFunction.ReLU:
  280.                     outputs = outputs.Map(MatrixFunctions.ReLU);
  281.                     break;
  282.                 case ActivationFunction.Sigmoid:
  283.                     outputs = outputs.Map(MatrixFunctions.Sigmoid);
  284.                     break;
  285.                 case ActivationFunction.Tanh:
  286.                     outputs = outputs.Map(MatrixFunctions.Tanh);
  287.                     break;
  288.                 case ActivationFunction.LeakyReLU:
  289.                     outputs = outputs.Map(MatrixFunctions.LeakyReLU);
  290.                     break;
  291.                 default:
  292.                     outputs = outputs.Map(MatrixFunctions.ReLU);
  293.                     break;
  294.             }
  295.         }
  296.         return outputs;
  297.     }
  298.  
  299.     private void Backpropagation(Matrix input, Matrix target)
  300.     {
  301.         Matrix[] outputs = new Matrix[_weights.Length + 1];
  302.         outputs[0] = input;
  303.  
  304.         for (int i = 0; i < _weights.Length; i++)
  305.         {
  306.             outputs[i + 1] = outputs[i] * _weights[i] + _biases[i];
  307.             outputs[i + 1] = outputs[i + 1].Map(_activationFunctions[i]);
  308.         }
  309.  
  310.         Matrix[] errors = new Matrix[_weights.Length];
  311.         errors[_weights.Length - 1] = outputs[^1] - target;
  312.  
  313.         for (int i = _weights.Length - 2; i >= 0; i--)
  314.         {
  315.             errors[i] = (_weights[i + 1].Transpose() * errors[i + 1]).MapDerivative(_activationFunctions[i]);
  316.         }
  317.  
  318.         Matrix[] gradientsWeights = new Matrix[_weights.Length];
  319.         Matrix[] gradientsBiases = new Matrix[_weights.Length];
  320.         Matrix[] gradientsGamma = new Matrix[_weights.Length];
  321.         Matrix[] gradientsBeta = new Matrix[_weights.Length];
  322.  
  323.         for (int i = 0; i < _weights.Length; i++)
  324.         {
  325.             gradientsWeights[i] = errors[i] * outputs[i].Transpose();
  326.             gradientsBiases[i] = errors[i];
  327.             gradientsGamma[i] = errors[i] * _movingMeans[i];
  328.             gradientsBeta[i] = errors[i] * _movingVariances[i];
  329.         }
  330.  
  331.         Optimizer(gradientsWeights, gradientsBiases, gradientsGamma, gradientsBeta);
  332.  
  333.         // Regularization
  334.         if (_regularizer != Regularizer.None)
  335.         {
  336.             for (int i = 0; i < _weights.Length; i++)
  337.             {
  338.                 if (_regularizer == Regularizer.L1)
  339.                 {
  340.                     _weights[i] -= (_lambda * MatrixFunctions.Sign(_weights[i]));
  341.                 }
  342.                 else if (_regularizer == Regularizer.L2)
  343.                 {
  344.                     _weights[i] -= (_lambda * _weights[i]);
  345.                 }
  346.             }
  347.         }
  348.     }
  349.  
  350.     public void Train(Matrix[] inputs, Matrix[] targets, int epochs, int batchSize)
  351.     {
  352.         Random rand = new Random();
  353.  
  354.         for (int epoch = 0; epoch < epochs; epoch++)
  355.         {
  356.             for (int i = 0; i < inputs.Length; i += batchSize)
  357.             {
  358.                 Matrix[] batchInputs = inputs.Skip(i).Take(batchSize).ToArray();
  359.                 Matrix[] batchTargets = targets.Skip(i).Take(batchSize).ToArray();
  360.  
  361.                 for (int j = 0; j < batchSize; j++)
  362.                 {
  363.                     Matrix outputs = FeedForward(batchInputs[j], true);
  364.                     Backpropagation(batchInputs[j], batchTargets[j]);
  365.                 }
  366.             }
  367.  
  368.             LearningRateScheduler(epoch);
  369.         }
  370.     }
  371.  
  372.     public Matrix Predict(Matrix input)
  373.     {
  374.         return FeedForward(input, false);
  375.     }
  376.  
  377.     private void LearningRateScheduler(int epoch)
  378.     {
  379.         _learningRate = _initialLearningRate / (1 + _decayRate * epoch);
  380.     }
  381.  
  382.     public class GithubNeuralNetwork
  383. {
  384.     // Existing fields and methods...
  385.  
  386.     private Matrix XavierInitialization(int rows, int cols, Random rand)
  387.     {
  388.         double scale = Math.Sqrt(2.0 / (rows + cols));
  389.         return Matrix.RandomMatrix(rows, cols, rand) * scale;
  390.     }
  391.  
  392.     private Matrix LayerNormalization(Matrix x, Matrix gamma, Matrix beta, int layerIndex)
  393.     {
  394.         Matrix mean = MatrixFunctions.Mean(x, axis: 1);
  395.         Matrix variance = MatrixFunctions.Variance(x, axis: 1);
  396.  
  397.         _movingMeans[layerIndex] = (_movingMeans[layerIndex] * 0.9) + (mean * 0.1);
  398.         _movingVariances[layerIndex] = (_movingVariances[layerIndex] * 0.9) + (variance * 0.1);
  399.  
  400.         Matrix normalized = (x - mean) / MatrixFunctions.Sqrt(variance + _epsilon);
  401.         return (gamma * normalized) + beta;
  402.     }
  403.  
  404.     private void Optimizer(Matrix[] gradientsWeights, Matrix[] gradientsBiases, Matrix[] gradientsGamma, Matrix[] gradientsBeta)
  405.     {
  406.     double final_lr = 0.1; // Define the final learning rate for AdaBound
  407.     double beta1 = 0.9; // Adam's hyperparameter (momentum decay)
  408.     double beta2 = 0.999; // Adam's hyperparameter (RMSprop decay)
  409.     double epsilon = 1e-8; // Small constant to prevent division by zero
  410.     double gamma = 1e-3; // AdaBound's hyperparameter
  411.     double schedule_decay = 0.004;
  412.    
  413.     for (int epoch = 0; epoch < epochs; epoch++)
  414.     {
  415.         for (int i = 0; i < inputs.Length; i += batchSize)
  416.     {
  417.         Matrix[] batchInputs = inputs.Skip(i).Take(batchSize).ToArray();
  418.         Matrix[] batchTargets = targets.Skip(i).Take(batchSize).ToArray();
  419.         _t++;
  420.        
  421.         double step_size = _learningRate * Math.Sqrt(1 - Math.Pow(beta2, _t)) / (1 - Math.Pow(beta1, _t));
  422.         double lower_bound = final_lr * (1.0 - 1.0 / (_t + 1));
  423.         double upper_bound = final_lr * (1.0 + 1.0 / (_t + 1));
  424.                
  425.         for (int k = 0; k < _weights.Length; k++)
  426.     {
  427.         for (int j = 0; j < batchSize; j++)
  428.     {
  429.         Matrix outputs = FeedForward(batchInputs[j], true);
  430.         Backpropagation(batchInputs[j], batchTargets[j]);
  431.         for (int i = 0; i < _weights.Length; i++)
  432.         {
  433.             for (int i = 0; i < _weights.Length; i++)
  434.             {
  435.             _t++;
  436.            
  437.             _mWeights[i] = (beta1 * _mWeights[i]) + ((1 - beta1) * gradientsWeights[i]);
  438.             _vWeights[i] = (beta2 * _vWeights[i]) + ((1 - beta2) * (gradientsWeights[i] * gradientsWeights[i]));
  439.  
  440.             _mBiases[i] = (beta1 * _mBiases[i]) + ((1 - beta1) * gradientsBiases[i]);
  441.             _vBiases[i] = (beta2 * _vBiases[i]) + ((1 - beta2) * (gradientsBiases[i] * gradientsBiases[i]));
  442.  
  443.             _mGamma[i] = (beta1 * _mGamma[i]) + ((1 - beta1) * gradientsGamma[i]);
  444.             _vGamma[i] = (beta2 * _vGamma[i]) + ((1 - beta2) * (gradientsGamma[i] * gradientsGamma[i]));
  445.  
  446.             _mBeta[i] = (beta1 * _mBeta[i]) + ((1 - beta1) * gradientsBeta[i]);
  447.             _vBeta[i] = (beta2 * _vBeta[i]) + ((1 - beta2) * (gradientsBeta[i] * gradientsBeta[i]));
  448.  
  449.             double schedule = schedule_decay * (1 - Math.Pow(0.999, _t)) / (1 - Math.Pow(0.9, _t));
  450.  
  451.             Matrix mHatWeights = _mWeights[i] / (1 - Math.Pow(beta1, _t));
  452.             Matrix vHatWeights = _vWeights[i] / (1 - Math.Pow(beta2, _t));
  453.  
  454.             Matrix mHatBiases = _mBiases[i] / (1 - Math.Pow(beta1, _t));
  455.             Matrix vHatBiases = _vBiases[i] / (1 - Math.Pow(beta2, _t));
  456.  
  457.             Matrix mHatGamma = _mGamma[i] / (1 - Math.Pow(beta1, _t));
  458.             Matrix vHatGamma = _vGamma[i] / (1 - Math.Pow(beta2, _t));
  459.  
  460.             Matrix mHatBeta = _mBeta[i] / (1 - Math.Pow(beta1, _t));
  461.             Matrix vHatBeta = _vBeta[i] / (1 - Math.Pow(beta2, _t));
  462.  
  463.             _weights[i] -= (_learningRate * schedule * mHatWeights) / (MatrixFunctions.Sqrt(vHatWeights) + epsilon);
  464.             _biases[i] -= (_learningRate * schedule * mHatBiases) / (MatrixFunctions.Sqrt(vHatBiases) + epsilon);
  465.             _gamma[i] -= (_learningRate * schedule * mHatGamma) / (MatrixFunctions.Sqrt(vHatGamma) + epsilon);
  466.             _beta[i] -= (_learningRate * schedule * mHatBeta) / (MatrixFunctions.Sqrt(vHatBeta) + epsilon);
  467.             }
  468.             if (_optimizer == "Adam")
  469.             {
  470.                 _mWeights[i] = (beta1 * _mWeights[i]) + ((1 - beta1) * gradientsWeights[i]);
  471.                 _vWeights[i] = (beta2 * _vWeights[i]) + ((1 - beta2) * (gradientsWeights[i] * gradientsWeights[i]));
  472.  
  473.                 _mBiases[i] = (beta1 * _mBiases[i]) + ((1 - beta1) * gradientsBiases[i]);
  474.                 _vBiases[i] = (beta2 * _vBiases[i]) + ((1 - beta2) * (gradientsBiases[i] * gradientsBiases[i]));
  475.  
  476.                 _mGamma[i] = (beta1 * _mGamma[i]) + ((1 - beta1) * gradientsGamma[i]);
  477.                 _vGamma[i] = (beta2 * _vGamma[i]) + ((1 - beta2) * (gradientsGamma[i] * gradientsGamma[i]));
  478.  
  479.                 _mBeta[i] = (beta1 * _mBeta[i]) + ((1 - beta1) * gradientsBeta[i]);
  480.                 _vBeta[i] = (beta2 * _vBeta[i]) + ((1 - beta2) * (gradientsBeta[i] * gradientsBeta[i]));
  481.  
  482.                 Matrix mHatWeights = _mWeights[i] / (1 - Math.Pow(beta1, _t));
  483.                 Matrix vHatWeights = _vWeights[i] / (1 - Math.Pow(beta2, _t));
  484.  
  485.                 Matrix mHatBiases = _mBiases[i] / (1 - Math.Pow(beta1, _t));
  486.                 Matrix vHatBiases = _vBiases[i] / (1 - Math.Pow(beta2, _t));
  487.  
  488.                 Matrix mHatGamma = _mGamma[i] / (1 - Math.Pow(beta1, _t));
  489.                 Matrix vHatGamma = _vGamma[i] / (1 - Math.Pow(beta2, _t));
  490.  
  491.                 Matrix mHatBeta = _mBeta[i] / (1 - Math.Pow(beta1, _t));
  492.                 Matrix vHatBeta = _vBeta[i] / (1 - Math.Pow(beta2, _t));
  493.  
  494.                 _weights[i] -= (_learningRate * mHatWeights) / (MatrixFunctions.Sqrt(vHatWeights) + epsilon);
  495.                 _biases[i] -= (_learningRate * mHatBiases) / (MatrixFunctions.Sqrt(vHatBiases) + epsilon);
  496.                 _gamma[i] -= (_learningRate * mHatGamma) / (MatrixFunctions.Sqrt(vHatGamma) + epsilon);
  497.                 _beta[i] -= (_learningRate * mHatBeta) / (MatrixFunctions.Sqrt(vHatBeta) + epsilon);
  498.  
  499.                 _t++;
  500.             }
  501.             else if (_optimizer == "AdaGrad")
  502.             {
  503.                 _vWeights[i] += gradientsWeights[i] * gradientsWeights[i];
  504.                 _vBiases[i] += gradientsBiases[i] * gradientsBiases[i];
  505.                 _vGamma[i] += gradientsGamma[i] * gradientsGamma[i];
  506.                 _vBeta[i] += gradientsBeta[i] * gradientsBeta[i];
  507.  
  508.                 _weights[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vWeights[i]) + epsilon)) * gradientsWeights[i];
  509.                 _biases[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vBiases[i]) + epsilon)) * gradientsBiases[i];
  510.                 _gamma[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vGamma[i]) + epsilon)) * gradientsGamma[i];
  511.                 _beta[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vBeta[i]) + epsilon)) * gradientsBeta[i];
  512.             }
  513.             else if (_optimizer == "RMSProp")
  514.             {
  515.                 _vWeights[i] = (beta1 * _vWeights[i]) + ((1 - beta1) * (gradientsWeights[i] * gradientsWeights[i]));
  516.                 _vBiases[i] = (beta1 * _vBiases[i]) + ((1 - beta1) * (gradientsBiases[i] * gradientsBiases[i]));
  517.                 _vGamma[i] = (beta1 * _vGamma[i]) + ((1 - beta1) * (gradientsGamma[i] * gradientsGamma[i]));
  518.                 _vBeta[i] = (beta1 * _vBeta[i]) + ((1 - beta1) * (gradientsBeta[i] * gradientsBeta[i]));
  519.  
  520.                 _weights[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vWeights[i]) + epsilon)) * gradientsWeights[i];
  521.                 _biases[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vBiases[i]) + epsilon)) * gradientsBiases[i];
  522.                 _gamma[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vGamma[i]) + epsilon)) * gradientsGamma[i];
  523.                 _beta[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vBeta[i]) + epsilon)) * gradientsBeta[i];
  524.             }
  525.             else if (_optimizer == "Lookahead")
  526.             {
  527.                 LookaheadOptimizer(gradientsWeights, gradientsBiases);
  528.             }
  529.             else
  530.             {
  531.                 _weights[i] -= _learningRate * gradientsWeights[i];
  532.                 _biases[i] -= _learningRate * gradientsBiases[i];
  533.                 _gamma[i] -= _learningRate * gradientsGamma[i];
  534.                 _beta[i] -= _learningRate * gradientsBeta[i];
  535.             }
  536.         }
  537.     }
  538. }
  539.  
  540. // DevBuild 0.01
  541.  
  542. private double AdaBound(double lr, double final_lr, double beta1, double beta2, double epsilon, int t)
  543. {
  544.     double step_size = lr * Math.Sqrt(1 - Math.Pow(beta2, t)) / (1 - Math.Pow(beta1, t));
  545.     double lower_bound = final_lr * (1.0 - 1.0 / (t + 1));
  546.     double upper_bound = final_lr * (1.0 + 1.0 / (t + 1));
  547.     return Math.Max(lower_bound, Math.Min(upper_bound, step_size));
  548. }
  549.  
  550. private void AdvancedOptimizer(Matrix[] gradientsWeights, Matrix[] gradientsBiases, Matrix[] gradientsGamma, Matrix[] gradientsBeta, int epochs, Matrix[] inputs, Matrix[] targets, int batchSize)
  551. {
  552.     double final_lr = 0.1; // Define the final learning rate for AdaBound
  553.     double beta1 = 0.9; // Adam's hyperparameter (momentum decay)
  554.     double beta2 = 0.999; // Adam's hyperparameter (RMSprop decay)
  555.     double epsilon = 1e-8; // Small constant to prevent division by zero
  556.     int t = 1;
  557.  
  558.     for (int epoch = 0; epoch < epochs; epoch++)
  559.     {
  560.         for (int i = 0; i < inputs.Length; i += batchSize)
  561.         {
  562.             Matrix[] batchInputs = inputs.Skip(i).Take(batchSize).ToArray();
  563.             Matrix[] batchTargets = targets.Skip(i).Take(batchSize).ToArray();
  564.             t++;
  565.  
  566.             double schedule = 0.004 * (1 - Math.Pow(0.999, t)) / (1 - Math.Pow(0.9, t));
  567.  
  568.             for (int j = 0; j < batchSize; j++)
  569.             {
  570.                 Matrix outputs = FeedForward(batchInputs[j], true);
  571.                 Backpropagation(batchInputs[j], batchTargets[j]);
  572.  
  573.                 for (int k = 0; k < _weights.Length; k++)
  574.                 {
  575.                     _mWeights[k] = (beta1 * _mWeights[k]) + ((1 - beta1) * gradientsWeights[k]);
  576.                     _vWeights[k] = (beta2 * _vWeights[k]) + ((1 - beta2) * (gradientsWeights[k] * gradientsWeights[k]));
  577.  
  578.                     _mBiases[k] = (beta1 * _mBiases[k]) + ((1 - beta1) * gradientsBiases[k]);
  579.                     _vBiases[k] = (beta2 * _vBiases[k]) + ((1 - beta2) * (gradientsBiases[k] * gradientsBiases[k]));
  580.  
  581.                     _mGamma[k] = (beta1 * _mGamma[k]) + ((1 - beta1) * gradientsGamma[k]);
  582.                     _vGamma[k] = (beta2 * _vGamma[k]) + ((1 - beta2) * (gradientsGamma[k] * gradientsGamma[k]));
  583.  
  584.                     _mBeta[k] = (beta1 * _mBeta[k]) + ((1 - beta1) * gradientsBeta[k]);
  585.                     _vBeta[k] = (beta2 * _vBeta[k]) + ((1 - beta2) * (gradientsBeta[k] * gradientsBeta[k]));
  586.  
  587.                     Matrix mHatWeights = _mWeights[k] / (1 - Math.Pow(beta1, t));
  588.                     Matrix vHatWeights = _vWeights[k] / (1 - Math.Pow(beta2, t));
  589.  
  590.                     Matrix mHatBiases = _mBiases[k] / (1 - Math.Pow(beta1, t));
  591.                     Matrix vHatBiases = _vBiases[k] / (1 - Math.Pow(beta2, t));
  592.  
  593.                     Matrix mHatGamma = _mGamma[k] / (1 - Math.Pow(beta1, t));
  594.                     Matrix vHatGamma = _vGamma[k] / (1 - Math.Pow(beta2, t));
  595.  
  596.                     Matrix mHatBeta = _mBeta[k] / (1 - Math.Pow(beta1, t));
  597.                     Matrix vHatBeta = _vBeta[k] / (1 - Math.Pow(beta2, t));
  598.  
  599.                     double step_size = AdaBound(_learningRate, final_lr, beta1, beta2, epsilon, t);
  600.  
  601.                     _weights[k] -= (step_size * schedule * mHatWeights) / (MatrixFunctions.Sqrt(vHatWeights) + epsilon);
  602.                     _biases[k] -= (step_size * schedule * mHatBiases) / (MatrixFunctions.Sqrt(vHatBiases) + epsilon);
  603.                     _gamma[k] -= (step_size * schedule * mHatGamma) / (MatrixFunctions.Sqrt(vHatGamma) + epsilon);
  604.                     _beta[k] -= (step_size * schedule * mHatBeta) / (MatrixFunctions.Sqrt(vHatBeta) + epsilon);
  605.                 }
  606.             }
  607.         }
  608.     }
  609. }
  610.  
  611. private Matrix GeM(Matrix x)
  612. {
  613.     return MatrixFunctions.GeM(x);
  614. }
  615.  
  616. private Matrix Swish(Matrix x)
  617. {
  618.     return x * MatrixFunctions.Sigmoid(x);
  619. }
  620.  
  621. private Matrix Mish(Matrix x)
  622. {
  623.     return x * MatrixFunctions.Tanh(MatrixFunctions.Softplus(x));
  624. }
  625.  
  626. private Matrix GELU(Matrix x)
  627. {
  628.     return 0.5 * x * (1 + MatrixFunctions.Tanh((Math.Sqrt(2 / Math.PI) * (x + 0.044715 * Math.Pow(x, 3)))));
  629. }
  630.  
  631. private Matrix AELU(Matrix x)
  632. {
  633.     double alpha = 0.01;
  634.     return x.Map(val => val > 0 ? val : alpha * (Math.Exp(val) - 1));
  635. }
  636.  
  637. private void SetActivationFunctions()
  638. {
  639.     Random rand = new Random();
  640.     for (int i = 0; i < _activationOptions.Length; i++)
  641.     {
  642.         int choice = rand.Next(10); // Increase the range to accommodate new functions
  643.         switch (choice)
  644.         {
  645.             case 0:
  646.                 _activationOptions[i] = ActivationFunction.ReLU;
  647.                 _activationFunctions[i] = MatrixFunctions.ReLU;
  648.                 break;
  649.             case 1:
  650.                 _activationOptions[i] = ActivationFunction.Sigmoid;
  651.                 _activationFunctions[i] = MatrixFunctions.Sigmoid;
  652.                 break;
  653.             case 2:
  654.                 _activationOptions[i] = ActivationFunction.Tanh;
  655.                 _activationFunctions[i] = MatrixFunctions.Tanh;
  656.                 break;
  657.             case 3:
  658.                 _activationOptions[i] = ActivationFunction.LeakyReLU;
  659.                 _activationFunctions[i] = MatrixFunctions.LeakyReLU;
  660.                 break;
  661.             case 4:
  662.                 _activationOptions[i] = ActivationFunction.Swish;
  663.                 _activationFunctions[i] = Swish;
  664.                 break;
  665.             case 5:
  666.                 _activationOptions[i] = ActivationFunction.Mish;
  667.                 _activationFunctions[i] = Mish;
  668.                 break;
  669.             case 6:
  670.                 _activationOptions[i] = ActivationFunction.GELU;
  671.                 _activationFunctions[i] = GELU;
  672.                 break;
  673.             case 7:
  674.                 _activationOptions[i] = ActivationFunction.GeM;
  675.                 _activationFunctions[i] = GeM;
  676.                 break;
  677.             case 8:
  678.                 _activationOptions[i] = ActivationFunction.AELU;
  679.                 _activationFunctions[i] = AELU;
  680.                 break;
  681.             default:
  682.                 _activationOptions[i] = ActivationFunction.ReLU; // Default to ReLU
  683.                 _activationFunctions[i] = MatrixFunctions.ReLU;
  684.                 break;
  685.         }
  686.     }
  687. }
  688.  
  689. public void TrainWithDynamicRegularization(Matrix[] inputs, Matrix[] targets, int epochs, int batchSize, double mixUpAlpha = 0.1, int cutMixBatchSize = 32, double cutMixAlpha = 0.3)
  690.     {
  691.         Random rand = new Random();
  692.  
  693.         for (int epoch = 0; epoch < epochs; epoch++)
  694.         {
  695.             for (int i = 0; i < inputs.Length; i += batchSize)
  696.             {
  697.                 Matrix[] batchInputs = inputs.Skip(i).Take(batchSize).ToArray();
  698.                 Matrix[] batchTargets = targets.Skip(i).Take(batchSize).ToArray();
  699.  
  700.                 // Apply MixUp or CutMix with given probabilities
  701.                 if (rand.NextDouble() < 0.5)
  702.                     MixUp(batchInputs, batchTargets, mixUpAlpha);
  703.                 else
  704.                     CutMix(batchInputs, batchTargets, cutMixBatchSize, cutMixAlpha);
  705.  
  706.                 for (int j = 0; j < batchSize; j++)
  707.                 {
  708.                     Matrix outputs = FeedForward(batchInputs[j], true);
  709.                     Backpropagation(batchInputs[j], batchTargets[j]);
  710.                 }
  711.             }
  712.  
  713.             LearningRateScheduler(epoch);
  714.         }
  715.     }
  716.    
  717.     public class GroupNormalization
  718. {
  719.     private int _numGroups;
  720.  
  721.     public GroupNormalization(int numGroups)
  722.     {
  723.         _numGroups = numGroups;
  724.     }
  725.  
  726.     public Matrix GroupNormalize(Matrix x)
  727.     {
  728.         int channels = x.RowCount;
  729.         int groupSize = channels / _numGroups;
  730.  
  731.         Matrix normalized = new Matrix(x.RowCount, x.ColumnCount);
  732.  
  733.         for (int i = 0; i < _numGroups; i++)
  734.         {
  735.             int start = i * groupSize;
  736.             int end = (i + 1) * groupSize;
  737.  
  738.             Matrix group = x.GetSubMatrix(start, 0, end - start, x.ColumnCount);
  739.  
  740.             Matrix groupMean = MatrixFunctions.Mean(group, axis: 0);
  741.             Matrix groupVariance = MatrixFunctions.Variance(group, axis: 0);
  742.  
  743.             for (int j = start; j < end; j++)
  744.             {
  745.                 for (int k = 0; k < x.ColumnCount; k++)
  746.                 {
  747.                     normalized[j, k] = (x[j, k] - groupMean[0, k]) / (Math.Sqrt(groupVariance[0, k]) + 1e-8);
  748.                 }
  749.             }
  750.         }
  751.  
  752.         return normalized;
  753.     }
  754. }
  755.  
  756. private GroupNormalization _groupNormalization;
  757.  
  758. {
  759.     if (useGroupNormalization)
  760.     {
  761.         _groupNormalization = new GroupNormalization(numGroups);
  762.     }
  763. }
  764.  
  765. private Matrix ApplyNormalization(Matrix input)
  766. {
  767.     if (_groupNormalization != null)
  768.     {
  769.         return _groupNormalization.GroupNormalize(input);
  770.     }
  771.     else
  772.     {
  773.         return input;
  774.     }
  775. }
  776.  
  777. private Matrix OrthogonalInitialization(int rows, int cols, Random rand)
  778. {
  779.     Matrix gaussianMatrix = Matrix.RandomMatrix(rows, cols, rand);
  780.     Matrix orthogonalMatrix = gaussianMatrix.Orthogonalize();
  781.     return orthogonalMatrix;
  782. }
  783.  
  784. private double CyclicalLearningRate(int epoch, double min_lr, double max_lr, int step_size, double gamma)
  785. {
  786.     int cycle = 1 + epoch / (2 * step_size);
  787.     double x = Math.Abs(epoch / step_size - 2 * cycle + 1);
  788.     double lr = min_lr + (max_lr - min_lr) * Math.Max(0, (1 - x)) * Math.Pow(gamma, epoch);
  789.     return lr;
  790. }
  791.  
  792. private double OneCyclePolicy(int epoch, double max_lr, int total_epochs, double pct_start = 0.3, double div_factor = 25.0, double final_div_factor = 1e4)
  793. {
  794.     int phase_epoch = (int)(total_epochs * pct_start);
  795.     int current_epoch = epoch + 1;
  796.  
  797.     if (current_epoch < phase_epoch)
  798.     {
  799.         double pct = current_epoch / phase_epoch;
  800.         double lr = max_lr / div_factor * (1 + Math.Cos(Math.PI * pct)) / 2;
  801.         return lr;
  802.     }
  803.     else
  804.     {
  805.         double pct = 1 - (current_epoch - phase_epoch) / (total_epochs - phase_epoch);
  806.         double lr = max_lr / final_div_factor * (1 + Math.Cos(Math.PI * pct)) / 2;
  807.         return lr;
  808.     }
  809. }
  810.  
  811. private void LearningRateScheduler(int epoch)
  812. {
  813.     double min_lr = 0.001; // Minimum learning rate
  814.     double max_lr = 0.01; // Maximum learning rate
  815.     int step_size = 5; // Step size for CLR
  816.     double gamma = 0.9; // Gamma for CLR
  817.  
  818.     _learningRate = CyclicalLearningRate(epoch, min_lr, max_lr, step_size, gamma);
  819. }
  820.  
  821. public class GithubNeuralNetwork
  822. {
  823.  
  824.     private double _dropblockKeepProb;
  825.     private int _dropblockSize;
  826.    
  827.     {
  828.  
  829.         _dropblockKeepProb = dropblockKeepProb;
  830.         _dropblockSize = dropblockSize;
  831.     }
  832.  
  833.     private Matrix ApplyDropBlock(Matrix input, bool training)
  834.     {
  835.         if (!training || _dropblockKeepProb == 1.0 || _dropblockSize <= 0)
  836.             return input;
  837.  
  838.         Random rand = new Random();
  839.         int height = input.RowCount;
  840.         int width = input.ColumnCount;
  841.  
  842.         int hStart = rand.Next(height - _dropblockSize + 1);
  843.         int wStart = rand.Next(width - _dropblockSize + 1);
  844.         int hEnd = Math.Min(hStart + _dropblockSize, height);
  845.         int wEnd = Math.Min(wStart + _dropblockSize, width);
  846.  
  847.         if (hEnd - hStart <= 0 || wEnd - wStart <= 0)
  848.             return input;
  849.  
  850.         Matrix mask = Matrix.Zeros(height, width);
  851.         mask.SetSubMatrix(hStart, hEnd, wStart, wEnd, 1.0);
  852.  
  853.         Matrix droppedInput = input.PointwiseMultiply(mask);
  854.         double count = mask.Sum();
  855.         droppedInput /= count;
  856.  
  857.         Matrix output = input.PointwiseMultiply(droppedInput);
  858.  
  859.         return output;
  860.     }
  861.  
  862.     private Matrix FeedForwardWithDropBlock(Matrix input, bool training)
  863.     {
  864.         Matrix outputs = input;
  865.  
  866.         for (int i = 0; i < _weights.Length; i++)
  867.         {
  868.             outputs = ApplyDropBlock(outputs, training);
  869.         }
  870.         return outputs;
  871.     }
  872.  
  873. }
  874.  
  875. private void MagnitudeBasedWeightPruning(double pruningThreshold)
  876.     {
  877.         for (int i = 0; i < _weights.Length; i++)
  878.         {
  879.             Matrix weightMask = (_weights[i].Map(Math.Abs) >= pruningThreshold).ToMatrix();
  880.             _weights[i] = _weights[i].PointwiseMultiply(weightMask);
  881.         }
  882.     }
  883.  
  884.     public void TrainWithPruning(Matrix[] inputs, Matrix[] targets, int epochs, int batchSize, double pruningThreshold)
  885.     {
  886.         Random rand = new Random();
  887.  
  888.         for (int epoch = 0; epoch < epochs; epoch++)
  889.         {
  890.             for (int i = 0; i < inputs.Length; i += batchSize)
  891.             {
  892.                 Matrix[] batchInputs = inputs.Skip(i).Take(batchSize).ToArray();
  893.                 Matrix[] batchTargets = targets.Skip(i).Take(batchSize).ToArray();
  894.  
  895.                 for (int j = 0; j < batchSize; j++)
  896.                 {
  897.                     Matrix outputs = FeedForward(batchInputs[j], true);
  898.                     Backpropagation(batchInputs[j], batchTargets[j]);
  899.                 }
  900.             }
  901.             LearningRateScheduler(epoch);
  902.             MagnitudeBasedWeightPruning(pruningThreshold);
  903.         }
  904.     }
  905. }
  906.  
  907. private void ClipGradients(Matrix[] gradients)
  908. {
  909.     double clipThreshold = 1.0;
  910.  
  911.     foreach (Matrix gradient in gradients)
  912.     {
  913.         gradient.MapInplace(x => Math.Min(x, clipThreshold));
  914.         gradient.MapInplace(x => Math.Max(x, -clipThreshold));
  915.     }
  916. }
  917.  
  918. ClipGradients(gradientsWeights);
  919. ClipGradients(gradientsBiases);
  920. ClipGradients(gradientsGamma);
  921. ClipGradients(gradientsBeta);
  922.  
  923. private void RMSPropOptimizer(Matrix[] gradients, ref Matrix[] velocities, double learningRate, double decayRate, double epsilon)
  924. {
  925.     for (int i = 0; i < gradients.Length; i++)
  926.     {
  927.         velocities[i] = (decayRate * velocities[i]) + ((1 - decayRate) * (gradients[i] * gradients[i]));
  928.         _weights[i] -= (learningRate / (MatrixFunctions.Sqrt(velocities[i]) + epsilon)) * gradients[i];
  929.     }
  930. }
  931.  
  932. RMSPropOptimizer(gradientsWeights, ref _vWeights, _learningRate, _decayRate, _epsilon);
  933. RMSPropOptimizer(gradientsBiases, ref _vBiases, _learningRate, _decayRate, _epsilon);
  934. RMSPropOptimizer(gradientsGamma, ref _vGamma, _learningRate, _decayRate, _epsilon);
  935. RMSPropOptimizer(gradientsBeta, ref _vBeta, _learningRate, _decayRate, _epsilon);
  936.  
  937. public void SnapshotEnsembleTrain(Matrix[] inputs, Matrix[] targets, int epochs, int batchSize, int numSnapshots)
  938.     {
  939.         List<GithubNeuralNetwork> snapshotModels = new List<GithubNeuralNetwork>();
  940.  
  941.         for (int snapshot = 0; snapshot < numSnapshots; snapshot++)
  942.         {
  943.             GithubNeuralNetwork snapshotModel = new GithubNeuralNetwork(double learningRate, double epsilon, string optimizer, double decayRate, double dropoutRate, Regularizer regularizer, double lambda, params int[] layers, double lookaheadAlpha = 0.5, double lookaheadBeta = 0.9, bool useGroupNormalization, int numGroups);
  944.             snapshotModels.Add(snapshotModel);
  945.  
  946.             Random rand = new Random();
  947.  
  948.             for (int epoch = 0; epoch < epochs; epoch++)
  949.             {
  950.                 for (int i = 0; i < inputs.Length; i += batchSize)
  951.                 {
  952.                     Matrix[] batchInputs = inputs.Skip(i).Take(batchSize).ToArray();
  953.                     Matrix[] batchTargets = targets.Skip(i).Take(batchSize).ToArray();
  954.  
  955.                     for (int j = 0; j < batchSize; j++)
  956.                     {
  957.                         Matrix outputs = snapshotModel.FeedForward(batchInputs[j], true);
  958.                         snapshotModel.Backpropagation(batchInputs[j], batchTargets[j]);
  959.                     }
  960.                 }
  961.  
  962.                 snapshotModel.LearningRateScheduler(epoch);
  963.             }
  964.         }
  965.         Matrix[] ensemblePredictions = new Matrix[inputs.Length];
  966.  
  967.         for (int i = 0; i < inputs.Length; i++)
  968.         {
  969.             Matrix aggregatedPrediction = Matrix.Zeros(targets[0].RowCount, targets[0].ColumnCount);
  970.  
  971.             foreach (var snapshotModel in snapshotModels)
  972.             {
  973.                 Matrix snapshotPrediction = snapshotModel.Predict(inputs[i]);
  974.                 aggregatedPrediction += snapshotPrediction;
  975.             }
  976.  
  977.             aggregatedPrediction /= numSnapshots;
  978.             ensemblePredictions[i] = aggregatedPrediction;
  979.         }
  980.         Matrix finalEnsemblePrediction = Matrix.Zeros(targets[0].RowCount, targets[0].ColumnCount);
  981.  
  982.         foreach (var prediction in ensemblePredictions)
  983.         {
  984.             finalEnsemblePrediction += prediction;
  985.         }
  986.         finalEnsemblePrediction /= inputs.Length;
  987.     }
  988. }
  989.  
  990. private void InitializeRAdamParameters()
  991. {
  992.     for (int i = 0; i < _weights.Length; i++)
  993.     {
  994.         _sWeights[i] = Matrix.Zeros(_weights[i].RowCount, _weights[i].ColumnCount);
  995.         _rWeights[i] = Matrix.Zeros(_weights[i].RowCount, _weights[i].ColumnCount);
  996.         _sBiases[i] = Matrix.Zeros(_biases[i].RowCount, _biases[i].ColumnCount);
  997.         _rBiases[i] = Matrix.Zeros(_biases[i].RowCount, _biases[i].ColumnCount);
  998.     }
  999. }
  1000.  
  1001. private void RAdamOptimizer(Matrix[] gradientsWeights, Matrix[] gradientsBiases)
  1002. {
  1003.     for (int i = 0; i < _weights.Length; i++)
  1004.     {
  1005.         _sWeights[i] = (_beta1 * _sWeights[i]) + ((1 - _beta1) * gradientsWeights[i]);
  1006.         _rWeights[i] = (_beta2 * _rWeights[i]) + ((1 - _beta2) * gradientsWeights[i].PointwiseMultiply(gradientsWeights[i]));
  1007.  
  1008.         Matrix sHatWeights = _sWeights[i] / (1 - Math.Pow(_beta1, _t));
  1009.         Matrix rHatWeights = _rWeights[i] / (1 - Math.Pow(_beta2, _t));
  1010.  
  1011.         Matrix updateWeights = sHatWeights.PointwiseDivide((MatrixFunctions.Sqrt(rHatWeights) + _epsilon));
  1012.  
  1013.         _weights[i] -= _learningRate * updateWeights;
  1014.     }
  1015. }
  1016. private void Backpropagation(Matrix input, Matrix target)
  1017. {
  1018.     RAdamOptimizer(gradientsWeights, gradientsBiases);
  1019. }
  1020.  
  1021. private void LearningRateScheduler()
  1022.     {
  1023.         _iterations++;
  1024.  
  1025.         if (_iterations % (_stepSize * 2) == 0)
  1026.         {
  1027.             _cycle++;
  1028.             _iterations = 0;
  1029.         }
  1030.  
  1031.         double cycleFraction = Math.Abs(_iterations - _stepSize) / (_stepSize * 1.0);
  1032.         double newLearningRate = _baseLearningRate + (_maxLearningRate - _baseLearningRate) * Math.Max(0, 1 - cycleFraction);
  1033.  
  1034.         _learningRate = newLearningRate;
  1035.     }
  1036.  
  1037.     public void Train(double learningRate, double epsilon, string optimizer, double decayRate, double dropoutRate, Regularizer regularizer, double lambda, params int[] layers, double lookaheadAlpha = 0.5, double lookaheadBeta = 0.9, bool useGroupNormalization, int numGroups, int epochs, int batchSize)
  1038.     {
  1039.         for (int epoch = 0; epoch < epochs; epoch++)
  1040.         {
  1041.             for (int i = 0; i < inputs.Length; i += batchSize)
  1042.             {
  1043.                 LearningRateScheduler();
  1044.             }
  1045.         }
  1046.     }
  1047. }
  1048.  
  1049. private Matrix BatchNormalization(Matrix x, int layerIndex)
  1050.     {
  1051.         Matrix mean = MatrixFunctions.Mean(x, axis: 1);
  1052.         Matrix variance = MatrixFunctions.Variance(x, axis: 1);
  1053.         Matrix normalized = (x - mean) / MatrixFunctions.Sqrt(variance + _epsilon);
  1054.         Matrix scaled = _gamma[layerIndex] * normalized + _beta[layerIndex];
  1055.         return scaled;
  1056.     }
  1057.  
  1058.     private Matrix LayerNormalization(Matrix x, int layerIndex)
  1059.     {
  1060.         Matrix mean = MatrixFunctions.Mean(x, axis: 1);
  1061.         Matrix variance = MatrixFunctions.Variance(x, axis: 1);
  1062.         Matrix normalized = (x - mean) / MatrixFunctions.Sqrt(variance + _epsilon);
  1063.         Matrix scaled = _gamma[layerIndex] * normalized + _beta[layerIndex];
  1064.         return scaled;
  1065.     }
  1066.  
  1067.     private Matrix FeedForward(Matrix input, bool training)
  1068.     {
  1069.         Matrix outputs = input;
  1070.  
  1071.         for (int i = 0; i < _weights.Length; i++)
  1072.         {
  1073.             if (training)
  1074.             {
  1075.                 outputs = BatchNormalization(outputs, i);
  1076.             }
  1077.         }
  1078.         return outputs;
  1079.     }
  1080. }
  1081.  
  1082. private Matrix HeInitialization(int rows, int cols, Random rand)
  1083. {
  1084.     double scale = Math.Sqrt(2.0 / rows);
  1085.     return Matrix.RandomMatrix(rows, cols, rand) * scale;
  1086. }
  1087.  
  1088. private Matrix VarianceScaling(int rows, int cols, Random rand)
  1089. {
  1090.     double scale = Math.Sqrt(2.0 / (rows + cols));
  1091.     return Matrix.RandomMatrix(rows, cols, rand) * scale;
  1092. }
  1093.  
  1094. private void InitializeWeightsAndBiases()
  1095. {
  1096.     Random rand = new Random();
  1097.     for (int i = 0; i < _weights.Length; i++)
  1098.     {
  1099.         if (_activationOptions[i] == ActivationFunction.ReLU)
  1100.         {
  1101.             _weights[i] = HeInitialization(_layers[i + 1], _layers[i], rand);
  1102.         }
  1103.         else
  1104.         {
  1105.             _weights[i] = VarianceScaling(_layers[i + 1], _layers[i], rand);
  1106.         }
  1107.  
  1108.         _biases[i] = Matrix.Zeros(_layers[i + 1], 1);
  1109.     }
  1110. }
  1111.  
  1112. private Matrix SELU(Matrix x)
  1113.     {
  1114.         double alpha = 1.6732632423543772848170429916717;
  1115.         double scale = 1.0507009873554804934193349852946;
  1116.         return x.Map(value => value > 0 ? scale * value : scale * (alpha * Math.Exp(value) - alpha));
  1117.     }
  1118.  
  1119.     private Matrix ApplyActivationFunction(Matrix x, ActivationFunction activation)
  1120.     {
  1121.         switch (activation)
  1122.         {
  1123.             case ActivationFunction.ReLU:
  1124.                 return MatrixFunctions.ReLU(x);
  1125.             case ActivationFunction.Sigmoid:
  1126.                 return MatrixFunctions.Sigmoid(x);
  1127.             case ActivationFunction.Tanh:
  1128.                 return MatrixFunctions.Tanh(x);
  1129.             case ActivationFunction.LeakyReLU:
  1130.                 return MatrixFunctions.LeakyReLU(x);
  1131.             case ActivationFunction.Swish:
  1132.                 return Swish(x);
  1133.             case ActivationFunction.Mish:
  1134.                 return Mish(x);
  1135.             case ActivationFunction.GELU:
  1136.                 return GELU(x);
  1137.             case ActivationFunction.SELU:
  1138.                 return SELU(x);
  1139.             default:
  1140.                 return MatrixFunctions.ReLU(x);
  1141.         }
  1142.     }
  1143.  
  1144.     private void SetActivationFunctions()
  1145.     {
  1146.         Random rand = new Random();
  1147.         for (int i = 0; i < _activationOptions.Length; i++)
  1148.         {
  1149.             int choice = rand.Next(8);
  1150.             _activationOptions[i] = (ActivationFunction)choice;
  1151.         }
  1152.     }
  1153. }
  1154.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement