Untitled

mod core {
    extern crate nalgebra;
    use nalgebra::{DMatrix, DVector};
    use std::time::{Duration, Instant};
    use rand::prelude::SliceRandom;
    use rand::Rng;
    use rand::distributions::uniform::UniformFloat;
    use self::nalgebra::{Matrix, Vector};

    // A simple stochastic/incremental descent neural network.

    pub struct NeuralNetwork {
        neurons: Vec<DVector<f64>>,
        biases: Vec<DVector<f64>>,
        connections: Vec<DMatrix<f64>>
    }

    impl NeuralNetwork {
        pub fn new(layers: &[usize]) -> NeuralNetwork {
            if layers.len() < 2 {
                panic!("Requires >1 layers");
            }
            for &x in layers {
                if x < 1usize {
                    panic!("All layer sizes must be >0");
                }
            }
            let mut neurons: Vec<DVector<f64>> = Vec::with_capacity(layers.len());
            let mut connections: Vec<DMatrix<f64>> = Vec::with_capacity(layers.len() - 1);
            let mut biases: Vec<DVector<f64>> = Vec::with_capacity(layers.len() - 1);

            let mut rng = rand::thread_rng();

            neurons.push(DVector::repeat(layers[0],0f64));
            for i in 1..layers.len() {
                neurons.push(DVector::repeat(layers[i],0f64));
                connections.push(DMatrix::new_random(layers[i],layers[i-1]));
                biases.push(DVector::new_random(layers[i]));
            }
            NeuralNetwork{ neurons, biases, connections }
        }

        // Feeds forward through network
        pub fn run(&mut self, inputs:&[f64]) -> &DVector<f64> {

            if inputs.len() != self.neurons[0].len() {
                panic!("Wrong number of inputs: {} given, {} required",inputs.len(),self.neurons[0].len());
            }

            self.neurons[0] = DVector::from_vec(inputs.to_vec()); // TODO Look into improving this
            for i in 0..self.connections.len() {
                // TODO Look into difference between '... .clone()' and '& ...' in this case (rn I think this just stops using move semantics)

                let temp = (&self.connections[i] * &self.neurons[i])+ &self.biases[i];
                self.neurons[i+1] = self.sigmoid_mapping(&temp);
            }

            &self.neurons[self.neurons.len() - 1] // TODO Look into removing this
        }
        // Trains the network
        pub fn train(&mut self, examples:&mut [(Vec<f64>,Vec<f64>)], duration:i32, log_interval:i32, batch_size:usize, learning_rate:f64, test_data:&[(Vec<f64>,Vec<f64>)]) -> () {
            let mut rng = rand::thread_rng();
            let mut iterations_elapsed = 0i32;
            let starting_evaluation = self.evaluate(test_data);
            loop {
                if iterations_elapsed == duration { break; }

                if iterations_elapsed % log_interval == 0 && iterations_elapsed != 0 {
                    let evaluation = self.evaluate(test_data);
                    println!("Iteration: {}, Cost: {:.7}, Classified: {}/{}",iterations_elapsed,evaluation.0,evaluation.1,test_data.len());
                }

                examples.shuffle(&mut rng);
                let batches = get_batches(examples,batch_size);

                for batch in batches {
                    self.update_batch(batch,learning_rate);
                }

                iterations_elapsed += 1;
            }
            let evaluation = self.evaluate(test_data);
            println!("Iteration: {}, Cost: {:.7}, Classified: {}/{}",iterations_elapsed,evaluation.0,evaluation.1,test_data.len());
            println!("Cost: {:.7} -> {:.7}",starting_evaluation.0,evaluation.0);
            println!("Classified: {:.7} -> {:.7}",starting_evaluation.1,evaluation.1);
            println!("Cost: {:.7}",evaluation.0-starting_evaluation.0);
            println!("Classified: +{:.7}",evaluation.1-starting_evaluation.1);
            fn get_batches(examples:&[(Vec<f64>,Vec<f64>)], batch_size: usize) -> Vec<&[(Vec<f64>,Vec<f64>)]> {
                let mut batches = Vec::new(); // TODO Look into if 'Vec::with_capacity(ceil(examples.len() / batch_size))' is more efficient

                let mut lower_bound = 0usize;
                let mut upper_bound = batch_size;
                while upper_bound < examples.len() {
                    batches.push(&examples[lower_bound..upper_bound]);
                    lower_bound = upper_bound;
                    // TODO Improve this to remove last unnecessary addition to 'upper_bound'
                    upper_bound += batch_size;
                }
                // Accounts for last batch possibly being under 'batch_size'
                batches.push(&examples[lower_bound..examples.len()]);

                batches
            }

        }

        fn update_batch(&mut self, batch: &[(Vec<f64>, Vec<f64>)], eta: f64) -> () {
            // Copies structure of self.neurons and self.connections with values of 0f64
            // TODO Look into a better way to setup 'bias_nabla' and 'weight_nabla'
            // TODO Better understand what 'nabla' means

            let mut clone_holder_b:Vec<DVector<f64>> = self.neurons.clone().iter().map(|x| x.map(|y| -> f64 { 0f64 }) ).collect();
            clone_holder_b.remove(0);
            let clone_holder_w:Vec<DMatrix<f64>> = self.connections.clone().iter().map(|x| x.map(|y| -> f64 { 0f64 }) ).collect();
            let mut nabla_b:Vec<DVector<f64>> = clone_holder_b.clone();
            let mut nabla_w:Vec<DMatrix<f64>> = clone_holder_w.clone();

            for example in batch {
                let (delta_nabla_w,delta_nabla_b):(Vec<DMatrix<f64>>,Vec<DVector<f64>>) =
                    self.backpropagate(example,clone_holder_w.clone(),clone_holder_b.clone());

                // Sums values (matrices) in each index together
                nabla_w = nabla_w.iter().zip(delta_nabla_w).map(|(x,y)|x + y).collect();
                nabla_b = nabla_b.iter().zip(delta_nabla_b).map(|(x,y)| x + y).collect();
            }

            // TODO Check if these lines could be done via matrix multiplication
            self.connections = self.connections.iter().zip(nabla_w.clone()).map(
                | (w,nw) |
                    w - (nw * (eta / batch.len() as f64))
            ).collect();
            self.biases = self.biases.iter().zip(nabla_b.clone()).map(
                | (b,nb) |
                    b - (nb * (eta / batch.len() as f64))
            ).collect();
        }

        fn backpropagate(
            &mut self,
            example:&(Vec<f64>,Vec<f64>),
            mut nabla_w:Vec<DMatrix<f64>>,
            mut nabla_b:Vec<DVector<f64>>
        ) -> (Vec<DMatrix<f64>>,Vec<DVector<f64>>) {

            let output = self.run(&example.0).clone();

            let target = DVector::from_vec(example.1.clone());

            let last_index = self.connections.len()-1; // = nabla_b.len()-1 = nabla_w.len()-1 = self.neurons.len()-2 = self.connections.len()-1


            let mut delta:DVector<f64> = self.sigmoid_prime_mapping(&output).component_mul(&cost_derivative(output,target));// Not cloning 'output' here might cause issues if 'sigmoid_prime_mapping(...)' is done 2nd after 'output' has been moved


            nabla_b[last_index] = delta.clone();
            nabla_w[last_index] = delta.clone() * self.neurons[last_index].transpose();

            for i in (1..self.neurons.len()-1).rev() {

                delta = self.sigmoid_prime_mapping(&self.neurons[i]).component_mul( // Might need to clone 'self.neurons[i]' here
                    &(self.connections[i].transpose() * delta)
                );

                nabla_b[i-1] = delta.clone();
                nabla_w[i-1] = delta.clone() * self.neurons[i-1].transpose();
            }

            return (nabla_w,nabla_b);

            fn cost_derivative(output:DVector<f64>,target:DVector<f64>) -> DVector<f64> {
                output-target
            }
        }

        // Returns tuple (average cost, number of examples correctly identified)
        pub fn evaluate(&mut self, test_data:&[(Vec<f64>,Vec<f64>)]) -> (f64,u32) {
            let mut correctly_classified = 0u32;
            let mut return_cost = 0f64;
            for example in test_data {
                let out = self.run(&example.0);
                let expected = DVector::from_vec(example.1.clone());
                return_cost += cost(out,&expected);

                if get_max_index(out) == get_max_index(&expected) {
                    correctly_classified += 1u32;
                }
            }
            return (return_cost / test_data.len() as f64, correctly_classified);

            // Returns index of max value
            fn get_max_index(vector:&DVector<f64>) -> usize{
                let mut max_index = 0usize;
                for i in 1..vector.len() {
                    if vector[i] > vector[max_index]  {
                        max_index = i;
                    }
                }
                return max_index;
            }

            fn cost(outputs: &DVector<f64>, targets: &DVector<f64>) -> f64 {
                // TODO This could probably be 1 line, look into that
                let error_vector = targets.clone() - outputs.clone();// TODO Look into removing '.clone()'s here
                let cost_vector = error_vector.component_mul(&error_vector);
                return cost_vector.mean();
            }
        }

        // Assumes y has already had 'sigmoid_mapping(...)' run
        fn sigmoid_prime_mapping(&self,y: &DVector<f64>) -> DVector<f64> {
            y.map(|x| -> f64 { x * (1f64 - x) })
        }
        fn sigmoid_mapping(&self,y: &DVector<f64>) -> DVector<f64>{
            y.map(|x| -> f64 { self.sigmoid(x) })
            // TODO I like simplifying to a map function, but this feels bad, look into it
        }
        fn sigmoid(&self,y: f64) -> f64 {
            1f64 / (1f64 + (-y).exp())
        }

    }
}

// TODO Look into how to name tests
// TODO Look into using 'debug_assert's instead
#[cfg(test)]
mod tests {

    extern crate nalgebra;
    use nalgebra::DVector;
    use std::fs::File;
    use std::io::{Read};
    use std::time::Duration;
    use crate::core::NeuralNetwork;

    // TODO Figure out best name for this
    const TESTING_MIN_COST:f64 = 01f64; // 1% inaccuracy

    #[test]
    fn new() {
        crate::core::NeuralNetwork::new(&[2,3,1]);
    }
    #[test]
    #[should_panic(expected="Requires >1 layers")]
    fn new_few_layers() {
        crate::core::NeuralNetwork::new(&[2]);
    }
    #[test]
    #[should_panic(expected="All layer sizes must be >0")]
    fn new_small_layers_0() {
        crate::core::NeuralNetwork::new(&[0,3,1]);
    }
    #[test]
    #[should_panic(expected="All layer sizes must be >0")]
    fn new_small_layers_1() {
        crate::core::NeuralNetwork::new(&[2,0,1]);
    }
    #[test]
    #[should_panic(expected="All layer sizes must be >0")]
    fn new_small_layers_2() {
        crate::core::NeuralNetwork::new(&[2,3,0]);
    }

    #[test]
    fn run_0() {
        let mut neural_network = crate::core::NeuralNetwork::new(&[1,1]);
        assert_eq!(neural_network.run(&vec![1f64]).len(),1usize);
    }
    #[test]
    fn run_1() {
        let mut neural_network = crate::core::NeuralNetwork::new(&[2,3]);
        assert_eq!(neural_network.run(&vec![1f64,0f64]).len(),3usize);
    }
    #[test]
    fn run_2() {
        let mut neural_network = crate::core::NeuralNetwork::new(&[2,3,1]);
        assert_eq!(neural_network.run(&vec![1f64,0f64]).len(),1usize);
    }
    #[test]
    #[should_panic(expected="Wrong number of inputs: 1 given, 2 required")]
    fn run_inputs_wrong_0() {
        let mut neural_network = crate::core::NeuralNetwork::new(&[2,3,1]);
        neural_network.run(&vec![1f64]);
    }
    #[test]
    #[should_panic(expected="Wrong number of inputs: 3 given, 2 required")]
    fn run_inputs_wrong_1() {
        let mut neural_network = crate::core::NeuralNetwork::new(&[2,3,1]);
        neural_network.run(&vec![1f64,1f64,0f64]);
    }

    // Tests network to learn an XOR gate.
    #[test]
    fn train_0() {
        let mut neural_network = crate::core::NeuralNetwork::new(&[2,3,4,2]);
        let mut examples = [
            (vec![0f64,0f64],vec![0f64,1f64]),
            (vec![1f64,0f64],vec![1f64,0f64]),
            (vec![0f64,1f64],vec![1f64,0f64]),
            (vec![1f64,1f64],vec![0f64,1f64])
        ];
        let test_data = examples.clone();
        neural_network.train(&mut examples,4000,400,4usize,2f64,&test_data);

        let evalutation = neural_network.evaluate(&examples);
        assert!(evalutation.0 < TESTING_MIN_COST);
        assert_eq!(evalutation.1,examples.len() as u32);
        //assert!(false);
    }

    // Tests network to recognize handwritten digits of 28x28 pixels
    #[test]
    fn train_1() {
        let mut neural_network = crate::core::NeuralNetwork::new(&[784,30,10]);

        let mut training_examples = get_examples(false);
        let testing_examples = get_examples(true);


        neural_network.train(&mut training_examples, 30, 1, 10usize, 3f64, &testing_examples);

        let evaluation = neural_network.evaluate(&testing_examples);
        // TODO This line and function is broken, takes ages.
        assert!(evaluation.0 < TESTING_MIN_COST);

        assert!(false);

        // TODO Add IO error checking
        fn get_examples(testing:bool) -> Vec<(Vec<f64>,Vec<f64>)> {
            let (mut images,mut labels) = if testing {
                (
                    get_images("data/MNIST/t10k-images.idx3-ubyte"),
                    get_labels("data/MNIST/t10k-labels.idx1-ubyte")
                )
            } else {
                (
                    get_images("data/MNIST/train-images.idx3-ubyte"),
                    get_labels("data/MNIST/train-labels.idx1-ubyte")
                )
            };
            //images = images[0..10000].to_vec();// TODO REMOVE, THIS FOR DEBUGGING
            //labels = labels[0..10000].to_vec();// TODO REMOVE, THIS FOR DEBUGGING
            let iterator = images.iter().zip(labels.iter());
            let mut examples = Vec::new();
            let set_output_layer = |label:u8| -> Vec<f64> { let mut temp = vec!(0f64;10); temp[label as usize] = 1f64; temp};
            for (image,label) in iterator {
                examples.push(
                    (
                        image.clone(),
                        set_output_layer(*label)
                    )
                );
            }
            return examples;

            fn get_labels(path:&str) -> Vec<u8> {
                let mut file = File::open(path).unwrap();
                let mut label_buffer = Vec::new();
                file.read_to_end(&mut label_buffer);

                // TODO Look into better ways to remove the 1st 7 elements
                label_buffer.drain(8..).collect()
            }

            fn get_images(path:&str) -> Vec<Vec<f64>> {
                let mut file = File::open(path).unwrap();
                let mut image_buffer_u8 = Vec::new();
                file.read_to_end(&mut image_buffer_u8);
                // Removes 1st 16 bytes
                image_buffer_u8 = image_buffer_u8.drain(16..).collect();

                // Converts from u8 to f64
                let mut image_buffer_f64 = Vec::new();
                for pixel in image_buffer_u8 {
                    image_buffer_f64.push(pixel as f64 / 255f64);
                }

                // Splits buffer into vectors for each image
                let mut images_vector = Vec::new();
                for i in (0..image_buffer_f64.len() / (28 * 28)).rev() {
                    images_vector.push(image_buffer_f64.split_off(i * 28 * 28));
                }
                // Does splitting in reverse order due to how '.split_off' works, so reverses back to original
                // order.
                images_vector.reverse();
                images_vector
            }
        }
    }
}