Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- use super::Matrix;
- use super::Network;
- use super::Vector;
- use std::cell::RefCell;
- use std::rc::Rc;
- pub struct Layer {
- vector: Rc<RefCell<Vector>>,
- gradient: Rc<RefCell<Vector>>,
- }
- pub struct Weight {
- matrix: Rc<RefCell<Matrix>>,
- deltas: Rc<RefCell<Matrix>>,
- }
- pub struct Kernel {
- input: Layer,
- output: Layer,
- weight: Weight,
- }
- struct TrainerOptions {
- step: f32,
- momentum: f32,
- }
- /// Stochastic Gradient Descent Trainer. This trainer implements a simple
- /// SGD back propagation algorithm on the given network. Provides a proxy
- /// forward() function to the underlying network. Training done on the
- /// backward() function.
- pub struct Trainer {
- network: Network,
- kernels: Vec<Kernel>,
- options: TrainerOptions,
- }
- impl Trainer {
- pub fn new(network: Network) -> Trainer {
- let options = TrainerOptions {
- step: 0.15,
- momentum: 0.005,
- };
- let gradients = network
- .vectors
- .iter()
- .map(|v| {
- let v = v.borrow();
- Rc::new(RefCell::new(Vector::new(v.width)))
- })
- .collect::<Vec<_>>();
- let deltas = network
- .matrices
- .iter()
- .map(|m| {
- let m = m.borrow();
- Rc::new(RefCell::new(Matrix::new(m.width, m.height)))
- })
- .collect::<Vec<_>>();
- let kernels = (0..network.matrices.len())
- .map(|n| Kernel {
- input: Layer {
- vector: network.vectors[n + 0].clone(),
- gradient: gradients[n + 0].clone(),
- },
- output: Layer {
- vector: network.vectors[n + 1].clone(),
- gradient: gradients[n + 1].clone(),
- },
- weight: Weight {
- matrix: network.matrices[n + 0].clone(),
- deltas: deltas[n + 0].clone(),
- },
- })
- .collect::<Vec<_>>();
- Trainer {
- kernels,
- network,
- options,
- }
- }
- pub fn forward(&mut self, input: Vec<f32>) -> Vec<f32> {
- self.network.forward(input)
- }
- fn derive(&self, x: f32) -> f32 {
- 1.0 - x * x
- }
- pub fn backward(&mut self, input: Vec<f32>, expect: Vec<f32>) {
- // phase 0: execute the network, write to output layer.
- self.network.forward(input);
- // phase 1: calculate output layer gradients.
- let kernel = self.kernels.last().unwrap();
- for o in 0..kernel.weight.matrix.borrow().height {
- let delta = expect[o] - kernel.output.vector.borrow()[o];
- let value = delta * self.derive(kernel.output.vector.borrow()[0]);
- kernel.output.gradient.borrow_mut()[o] = value;
- }
- // phase 2: calculate gradients on hidden layers.
- for k in (0..self.kernels.len()).rev() {
- let kernel = &self.kernels[k];
- let mut input_gradient = kernel.input.gradient.borrow_mut();
- let output_gradient = kernel.output.gradient.borrow();
- let matrix = kernel.weight.matrix.borrow();
- input_gradient.set(matrix.backward(&output_gradient).data);
- input_gradient.derive(|x| x * self.derive(*x));
- }
- // phase 3: gradient decent on the weights.
- for k in (0..self.kernels.len()).rev() {
- let kernel = &self.kernels[k];
- let mut matrix = kernel.weight.matrix.borrow_mut();
- let mut deltas = kernel.weight.deltas.borrow_mut();
- let input_vector = kernel.input.vector.borrow();
- let output_gradient = kernel.output.gradient.borrow();
- for i in 0..matrix.width {
- for o in 0..matrix.height {
- let old_delta = deltas[(i, o)];
- let new_delta = (self.options.step * input_vector[i] * output_gradient[o])
- + (self.options.momentum * old_delta);
- let new_weight = matrix[(i, o)] + new_delta;
- matrix[(i, o)] = new_weight;
- deltas[(i, o)] = new_delta;
- }
- }
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement