Advertisement
Guest User

Untitled

a guest
Sep 16th, 2019
145
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Rust 15.99 KB | None | 0 0
  1. mod core {
  2.     extern crate nalgebra;
  3.     use nalgebra::{DMatrix, DVector};
  4.     use std::time::{Duration, Instant};
  5.     use rand::prelude::SliceRandom;
  6.     use rand::Rng;
  7.     use rand::distributions::uniform::UniformFloat;
  8.     use self::nalgebra::{Matrix, Vector};
  9.  
  10.     // A simple stochastic/incremental descent neural network.
  11.  
  12.     pub struct NeuralNetwork {
  13.         neurons: Vec<DVector<f64>>,
  14.         biases: Vec<DVector<f64>>,
  15.         connections: Vec<DMatrix<f64>>
  16.     }
  17.  
  18.     impl NeuralNetwork {
  19.         pub fn new(layers: &[usize]) -> NeuralNetwork {
  20.             if layers.len() < 2 {
  21.                 panic!("Requires >1 layers");
  22.             }
  23.             for &x in layers {
  24.                 if x < 1usize {
  25.                     panic!("All layer sizes must be >0");
  26.                 }
  27.             }
  28.             let mut neurons: Vec<DVector<f64>> = Vec::with_capacity(layers.len());
  29.             let mut connections: Vec<DMatrix<f64>> = Vec::with_capacity(layers.len() - 1);
  30.             let mut biases: Vec<DVector<f64>> = Vec::with_capacity(layers.len() - 1);
  31.  
  32.             let mut rng = rand::thread_rng();
  33.  
  34.             neurons.push(DVector::repeat(layers[0],0f64));
  35.             for i in 1..layers.len() {
  36.                 neurons.push(DVector::repeat(layers[i],0f64));
  37.                 connections.push(DMatrix::new_random(layers[i],layers[i-1]));
  38.                 biases.push(DVector::new_random(layers[i]));
  39.             }
  40.             NeuralNetwork{ neurons, biases, connections }
  41.         }
  42.  
  43.         // Feeds forward through network
  44.         pub fn run(&mut self, inputs:&[f64]) -> &DVector<f64> {
  45.  
  46.             if inputs.len() != self.neurons[0].len() {
  47.                 panic!("Wrong number of inputs: {} given, {} required",inputs.len(),self.neurons[0].len());
  48.             }
  49.  
  50.             self.neurons[0] = DVector::from_vec(inputs.to_vec()); // TODO Look into improving this
  51.             for i in 0..self.connections.len() {
  52.                 // TODO Look into difference between '... .clone()' and '& ...' in this case (rn I think this just stops using move semantics)
  53.  
  54.                 let temp = (&self.connections[i] * &self.neurons[i])+ &self.biases[i];
  55.                 self.neurons[i+1] = self.sigmoid_mapping(&temp);
  56.             }
  57.  
  58.             &self.neurons[self.neurons.len() - 1] // TODO Look into removing this
  59.         }
  60.         // Trains the network
  61.         pub fn train(&mut self, examples:&mut [(Vec<f64>,Vec<f64>)], duration:i32, log_interval:i32, batch_size:usize, learning_rate:f64, test_data:&[(Vec<f64>,Vec<f64>)]) -> () {
  62.             let mut rng = rand::thread_rng();
  63.             let mut iterations_elapsed = 0i32;
  64.             let starting_evaluation = self.evaluate(test_data);
  65.             loop {
  66.                 if iterations_elapsed == duration { break; }
  67.  
  68.                 if iterations_elapsed % log_interval == 0 && iterations_elapsed != 0 {
  69.                     let evaluation = self.evaluate(test_data);
  70.                     println!("Iteration: {}, Cost: {:.7}, Classified: {}/{}",iterations_elapsed,evaluation.0,evaluation.1,examples.len());
  71.                 }
  72.  
  73.                 examples.shuffle(&mut rng);
  74.                 let batches = get_batches(examples,batch_size);
  75.  
  76.                 for batch in batches {
  77.                     self.update_batch(batch,learning_rate);
  78.                 }
  79.  
  80.                 iterations_elapsed += 1;
  81.             }
  82.             let evaluation = self.evaluate(test_data);
  83.             println!("Iteration: {}, Cost: {:.7}, Classified: {}/{}",iterations_elapsed,evaluation.0,evaluation.1,examples.len());
  84.             println!("Cost: {:.7} -> {:.7}",starting_evaluation.0,evaluation.0);
  85.             println!("Classified: {:.7} -> {:.7}",starting_evaluation.1,evaluation.1);
  86.             println!("Cost: {:.7}",evaluation.0-starting_evaluation.0);
  87.             println!("Classified: +{:.7}",evaluation.1-starting_evaluation.1);
  88.             fn get_batches(examples:&[(Vec<f64>,Vec<f64>)], batch_size: usize) -> Vec<&[(Vec<f64>,Vec<f64>)]> {
  89.                 let mut batches = Vec::new(); // TODO Look into if 'Vec::with_capacity(ceil(examples.len() / batch_size))' is more efficient
  90.  
  91.                 let mut lower_bound = 0usize;
  92.                 let mut upper_bound = batch_size;
  93.                 while upper_bound < examples.len() {
  94.                     batches.push(&examples[lower_bound..upper_bound]);
  95.                     lower_bound = upper_bound;
  96.                     // TODO Improve this to remove last unnecessary addition to 'upper_bound'
  97.                     upper_bound += batch_size;
  98.                 }
  99.                 // Accounts for last batch possibly being under 'batch_size'
  100.                 batches.push(&examples[lower_bound..examples.len()]);
  101.  
  102.                 batches
  103.             }
  104.  
  105.         }
  106.  
  107.         fn update_batch(&mut self, batch: &[(Vec<f64>, Vec<f64>)], eta: f64) -> () {
  108.             // Copies structure of self.neurons and self.connections with values of 0f64
  109.             // TODO Look into a better way to setup 'bias_nabla' and 'weight_nabla'
  110.             // TODO Better understand what 'nabla' means
  111.  
  112.             let mut clone_holder_b:Vec<DVector<f64>> = self.neurons.clone().iter().map(|x| x.map(|y| -> f64 { 0f64 }) ).collect();
  113.             clone_holder_b.remove(0);
  114.             let clone_holder_w:Vec<DMatrix<f64>> = self.connections.clone().iter().map(|x| x.map(|y| -> f64 { 0f64 }) ).collect();
  115.             let mut nabla_b:Vec<DVector<f64>> = clone_holder_b.clone();
  116.             let mut nabla_w:Vec<DMatrix<f64>> = clone_holder_w.clone();
  117.  
  118.             for example in batch {
  119.                 let (delta_nabla_w,delta_nabla_b):(Vec<DMatrix<f64>>,Vec<DVector<f64>>) =
  120.                     self.backpropagate(example,clone_holder_w.clone(),clone_holder_b.clone());
  121.  
  122.                 // Sums values (matrices) in each index together
  123.                 nabla_w = nabla_w.iter().zip(delta_nabla_w).map(|(x,y)|x + y).collect();
  124.                 nabla_b = nabla_b.iter().zip(delta_nabla_b).map(|(x,y)| x + y).collect();
  125.             }
  126.  
  127.             // TODO Check if these lines could be done via matrix multiplication
  128.             self.connections = self.connections.iter().zip(nabla_w.clone()).map(
  129.                 | (w,nw) |
  130.                     w - (nw * (eta / batch.len() as f64))
  131.             ).collect();
  132.             self.biases = self.biases.iter().zip(nabla_b.clone()).map(
  133.                 | (b,nb) |
  134.                     b - (nb * (eta / batch.len() as f64))
  135.             ).collect();
  136.         }
  137.  
  138.         fn backpropagate(
  139.             &mut self,
  140.             example:&(Vec<f64>,Vec<f64>),
  141.             mut nabla_w:Vec<DMatrix<f64>>,
  142.             mut nabla_b:Vec<DVector<f64>>
  143.         ) -> (Vec<DMatrix<f64>>,Vec<DVector<f64>>) {
  144.  
  145.             let output = self.run(&example.0).clone();
  146.  
  147.             let target = DVector::from_vec(example.1.clone());
  148.  
  149.             let last_index = self.connections.len()-1; // = nabla_b.len()-1 = nabla_w.len()-1 = self.neurons.len()-2 = self.connections.len()-1
  150.  
  151.  
  152.  
  153.             let mut delta:DVector<f64> = self.sigmoid_prime_mapping(&output).component_mul(&cost_derivative(output,target));// Not cloning 'output' here might cause issues if 'sigmoid_prime_mapping(...)' is done 2nd after 'output' has been moved
  154.  
  155.  
  156.             nabla_b[last_index] = delta.clone();
  157.             nabla_w[last_index] = delta.clone() * self.neurons[last_index].transpose();
  158.  
  159.             for i in (1..self.neurons.len()-1).rev() {
  160.  
  161.                 delta = self.sigmoid_prime_mapping(&self.neurons[i]).component_mul( // Might need to clone 'self.neurons[i]' here
  162.                     &(self.connections[i].transpose() * delta)
  163.                 );
  164.  
  165.                 nabla_b[i-1] = delta.clone();
  166.                 nabla_w[i-1] = delta.clone() * self.neurons[i-1].transpose();
  167.             }
  168.  
  169.             return (nabla_w,nabla_b);
  170.  
  171.             fn cost_derivative(output:DVector<f64>,target:DVector<f64>) -> DVector<f64> {
  172.                 output-target
  173.             }
  174.         }
  175.  
  176.         // Returns tuple (average cost, number of examples correctly identified)
  177.         pub fn evaluate(&mut self, test_data:&[(Vec<f64>,Vec<f64>)]) -> (f64,u32) {
  178.             let mut correctly_classified = 0u32;
  179.             let mut return_cost = 0f64;
  180.             for example in test_data {
  181.                 let out = self.run(&example.0);
  182.                 let expected = DVector::from_vec(example.1.clone());
  183.                 return_cost += cost(out,&expected);
  184.  
  185.                 if get_max_index(out) == get_max_index(&expected) {
  186.                     correctly_classified += 1u32;
  187.                 }
  188.             }
  189.             return (return_cost / test_data.len() as f64, correctly_classified);
  190.  
  191.             // Returns index of max value
  192.             fn get_max_index(vector:&DVector<f64>) -> usize{
  193.                 let mut max_index = 0usize;
  194.                 for i in 1..vector.len() {
  195.                     if vector[i] > vector[max_index]  {
  196.                         max_index = i;
  197.                     }
  198.                 }
  199.                 return max_index;
  200.             }
  201.  
  202.             fn cost(outputs: &DVector<f64>, targets: &DVector<f64>) -> f64 {
  203.                 // TODO This could probably be 1 line, look into that
  204.                 let error_vector = targets.clone() - outputs.clone();// TODO Look into removing '.clone()'s here
  205.                 let cost_vector = error_vector.component_mul(&error_vector);
  206.                 return cost_vector.mean();
  207.             }
  208.         }
  209.  
  210.         // Assumes y has already had 'sigmoid_mapping(...)' run
  211.         fn sigmoid_prime_mapping(&self,y: &DVector<f64>) -> DVector<f64> {
  212.             y.map(|x| -> f64 { x * (1f64 - x) })
  213.         }
  214.         fn sigmoid_mapping(&self,y: &DVector<f64>) -> DVector<f64>{
  215.             y.map(|x| -> f64 { self.sigmoid(x) })
  216.             // TODO I like simplifying to a map function, but this feels bad, look into it
  217.         }
  218.         fn sigmoid(&self,y: f64) -> f64 {
  219.             1f64 / (1f64 + (-y).exp())
  220.         }
  221.  
  222.     }
  223. }
  224.  
  225. // TODO Look into how to name tests
  226. // TODO Look into using 'debug_assert's instead
  227. #[cfg(test)]
  228. mod tests {
  229.  
  230.     extern crate nalgebra;
  231.     use nalgebra::DVector;
  232.     use std::fs::File;
  233.     use std::io::{Read};
  234.     use std::time::Duration;
  235.     use crate::core::NeuralNetwork;
  236.  
  237.     // TODO Figure out best name for this
  238.     const TESTING_MIN_COST:f64 = 01f64; // 1% inaccuracy
  239.  
  240.     #[test]
  241.     fn new() {
  242.         crate::core::NeuralNetwork::new(&[2,3,1]);
  243.     }
  244.     #[test]
  245.     #[should_panic(expected="Requires >1 layers")]
  246.     fn new_few_layers() {
  247.         crate::core::NeuralNetwork::new(&[2]);
  248.     }
  249.     #[test]
  250.     #[should_panic(expected="All layer sizes must be >0")]
  251.     fn new_small_layers_0() {
  252.         crate::core::NeuralNetwork::new(&[0,3,1]);
  253.     }
  254.     #[test]
  255.     #[should_panic(expected="All layer sizes must be >0")]
  256.     fn new_small_layers_1() {
  257.         crate::core::NeuralNetwork::new(&[2,0,1]);
  258.     }
  259.     #[test]
  260.     #[should_panic(expected="All layer sizes must be >0")]
  261.     fn new_small_layers_2() {
  262.         crate::core::NeuralNetwork::new(&[2,3,0]);
  263.     }
  264.  
  265.     #[test]
  266.     fn run_0() {
  267.         let mut neural_network = crate::core::NeuralNetwork::new(&[1,1]);
  268.         assert_eq!(neural_network.run(&vec![1f64]).len(),1usize);
  269.     }
  270.     #[test]
  271.     fn run_1() {
  272.         let mut neural_network = crate::core::NeuralNetwork::new(&[2,3]);
  273.         assert_eq!(neural_network.run(&vec![1f64,0f64]).len(),3usize);
  274.     }
  275.     #[test]
  276.     fn run_2() {
  277.         let mut neural_network = crate::core::NeuralNetwork::new(&[2,3,1]);
  278.         assert_eq!(neural_network.run(&vec![1f64,0f64]).len(),1usize);
  279.     }
  280.     #[test]
  281.     #[should_panic(expected="Wrong number of inputs: 1 given, 2 required")]
  282.     fn run_inputs_wrong_0() {
  283.         let mut neural_network = crate::core::NeuralNetwork::new(&[2,3,1]);
  284.         neural_network.run(&vec![1f64]);
  285.     }
  286.     #[test]
  287.     #[should_panic(expected="Wrong number of inputs: 3 given, 2 required")]
  288.     fn run_inputs_wrong_1() {
  289.         let mut neural_network = crate::core::NeuralNetwork::new(&[2,3,1]);
  290.         neural_network.run(&vec![1f64,1f64,0f64]);
  291.     }
  292.  
  293.     // Tests network to learn an XOR gate.
  294.     #[test]
  295.     fn train_0() {
  296.         let mut neural_network = crate::core::NeuralNetwork::new(&[2,3,4,2]);
  297.         let mut examples = [
  298.             (vec![0f64,0f64],vec![0f64,1f64]),
  299.             (vec![1f64,0f64],vec![1f64,0f64]),
  300.             (vec![0f64,1f64],vec![1f64,0f64]),
  301.             (vec![1f64,1f64],vec![0f64,1f64])
  302.         ];
  303.         let test_data = examples.clone();
  304.         neural_network.train(&mut examples,4000,400,4usize,2f64,&test_data);
  305.  
  306.         let evalutation = neural_network.evaluate(&examples);
  307.         assert!(evalutation.0 < TESTING_MIN_COST);
  308.         assert_eq!(evalutation.1,examples.len() as u32);
  309.         //assert!(false);
  310.     }
  311.  
  312.     // Tests network to recognize handwritten digits of 28x28 pixels
  313.     #[test]
  314.     fn train_1() {
  315.         let mut neural_network = crate::core::NeuralNetwork::new(&[784,30,10]);
  316.  
  317.         let mut training_examples = get_examples(false);
  318.         let testing_examples = get_examples(true);
  319.  
  320.  
  321.         neural_network.train(&mut training_examples, 30, 1, 10usize, 3f64, &testing_examples);
  322.  
  323.         let evaluation = neural_network.evaluate(&testing_examples);
  324.         // TODO This line and function is broken, takes ages.
  325.         assert!(evaluation.0 < TESTING_MIN_COST);
  326.  
  327.         assert!(false);
  328.  
  329.         // TODO Add IO error checking
  330.         fn get_examples(testing:bool) -> Vec<(Vec<f64>,Vec<f64>)> {
  331.             let (mut images,mut labels) = if testing {
  332.                 (
  333.                     get_images("data/MNIST/t10k-images.idx3-ubyte"),
  334.                     get_labels("data/MNIST/t10k-labels.idx1-ubyte")
  335.                 )
  336.             } else {
  337.                 (
  338.                     get_images("data/MNIST/train-images.idx3-ubyte"),
  339.                     get_labels("data/MNIST/train-labels.idx1-ubyte")
  340.                 )
  341.             };
  342.             //images = images[0..10000].to_vec();// TODO REMOVE, THIS FOR DEBUGGING
  343.             //labels = labels[0..10000].to_vec();// TODO REMOVE, THIS FOR DEBUGGING
  344.             let iterator = images.iter().zip(labels.iter());
  345.             let mut examples = Vec::new();
  346.             let set_output_layer = |label:u8| -> Vec<f64> { let mut temp = vec!(0f64;10); temp[label as usize] = 1f64; temp};
  347.             for (image,label) in iterator {
  348.                 examples.push(
  349.                     (
  350.                         image.clone(),
  351.                         set_output_layer(*label)
  352.                     )
  353.                 );
  354.             }
  355.             return examples;
  356.  
  357.             fn get_labels(path:&str) -> Vec<u8> {
  358.                 let mut file = File::open(path).unwrap();
  359.                 let mut label_buffer = Vec::new();
  360.                 file.read_to_end(&mut label_buffer);
  361.  
  362.                 // TODO Look into better ways to remove the 1st 7 elements
  363.                 label_buffer.drain(8..).collect()
  364.             }
  365.  
  366.             fn get_images(path:&str) -> Vec<Vec<f64>> {
  367.                 let mut file = File::open(path).unwrap();
  368.                 let mut image_buffer_u8 = Vec::new();
  369.                 file.read_to_end(&mut image_buffer_u8);
  370.                 // Removes 1st 16 bytes
  371.                 image_buffer_u8 = image_buffer_u8.drain(16..).collect();
  372.  
  373.                 // Converts from u8 to f64
  374.                 let mut image_buffer_f64 = Vec::new();
  375.                 for pixel in image_buffer_u8 {
  376.                     image_buffer_f64.push(pixel as f64 / 255f64);
  377.                 }
  378.  
  379.                 // Splits buffer into vectors for each image
  380.                 let mut images_vector = Vec::new();
  381.                 for i in (0..image_buffer_f64.len() / (28 * 28)).rev() {
  382.                     images_vector.push(image_buffer_f64.split_off(i * 28 * 28));
  383.                 }
  384.                 // Does splitting in reverse order due to how '.split_off' works, so reverses back to original
  385.                 // order.
  386.                 images_vector.reverse();
  387.                 images_vector
  388.             }
  389.         }
  390.     }
  391. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement