Untitled

#include <vector>
#include <iostream>
#include <cstdlib>
#include <cassert>
#include <cmath>
#include <fstream>
#include <sstream>
#include <algorithm>


using namespace std;

int ReverseInt (int i)
{
    unsigned char ch1, ch2, ch3, ch4;
    ch1=i&255;
    ch2=(i>>8)&255;
    ch3=(i>>16)&255;
    ch4=(i>>24)&255;
    return((int)ch1<<24)+((int)ch2<<16)+((int)ch3<<8)+ch4;
}
void ReadMNIST(string fileName, int NumberOfImages, int DataOfAnImage,vector<vector<double> > &arr)
{
    arr.resize(NumberOfImages,vector<double>(DataOfAnImage));
    ifstream file (fileName.c_str(),ios::binary);
    if (file.is_open())
    {

        int magic_number=0;
        int number_of_images=0;
        int n_rows=0;
        int n_cols=0;
        file.read((char*)&magic_number,sizeof(magic_number));
        magic_number= ReverseInt(magic_number);
        file.read((char*)&number_of_images,sizeof(number_of_images));
        number_of_images= ReverseInt(number_of_images);
        file.read((char*)&n_rows,sizeof(n_rows));
        n_rows= ReverseInt(n_rows);
        file.read((char*)&n_cols,sizeof(n_cols));
        n_cols= ReverseInt(n_cols);
        for(int i=0;i<number_of_images;++i)
        {
            for(int r=0;r<n_rows;++r)
            {
                for(int c=0;c<n_cols;++c)
                {
                    unsigned char temp=0;
                    file.read((char*)&temp,sizeof(temp));
                    arr[i][(n_rows*r)+c]= (double)temp;
                }
            }

        }
    }
    return;
}


void ReadMNIST_labels(string fileName, int NumberOfLabels, vector<double> &arr)
{
    arr.resize(NumberOfLabels);
    ifstream file (fileName.c_str(),ios::binary);
    if (file.is_open())
    {

        int magic_number=0;
        int number_of_items=0;
        file.read((char*)&magic_number,sizeof(magic_number));
        magic_number= ReverseInt(magic_number);
        file.read((char*)&number_of_items,sizeof(number_of_items));
        number_of_items= ReverseInt(number_of_items);
        for(int i=0;i<number_of_items;++i)
        {
            unsigned char temp=0;
            file.read((char*)&temp,sizeof(temp));
            arr[i]= (double)temp;
        }
    }
    return;
}


class MNISTData
{
public:
    MNISTData(string ImagesFileName, string LabelsFileName, int nm);

    // Returns the number of input values read from the file:
    unsigned getNextInputs(vector<double> &inputVals);
    int getTargetOutputs(vector<double> &targetOutputVals);
    void setBack();

private:
    vector<vector<double> > data;
    int count;
};


void MNISTData::setBack(){
    count=0;

    //shuffle the original database
    random_shuffle(data.begin(), data.end());
}


MNISTData::MNISTData(string ImagesFileName, string LabelsFileName, int nm)
{
    vector<double> labels;
    ReadMNIST(ImagesFileName, nm, 784, data);
    ReadMNIST_labels(LabelsFileName, nm, labels);
    for(unsigned int i=0; i<data.size(); ++i){
        data[i].push_back(labels[i]);
    }
    count = 0;
}

unsigned MNISTData::getNextInputs(vector<double> &inputVals)
{
    inputVals.clear();
    inputVals=data[count];
    inputVals.pop_back();
    return inputVals.size();
}

int MNISTData::getTargetOutputs(vector<double> &targetOutputVals)
{
    targetOutputVals.clear();

    for(unsigned int i = 0; i<10; ++i){
        if(i==data[count].back()){
            targetOutputVals.push_back(1.0);
        }
        else{
            targetOutputVals.push_back(-1.0);
        }
    }

    count++;
    return data[count-1].back();
}


struct Connection
{
    double weight;
    double deltaWeight;
};


class Neuron;

typedef vector<Neuron> Layer;

// ****************** class Neuron ******************
class Neuron
{
public:
    Neuron(unsigned numOutputs, unsigned myIndex);
    void setOutputVal(double val) { m_outputVal = val; }
    double getOutputVal(void) const { return m_outputVal; }
    void feedForward(const Layer &prevLayer);
    void calcOutputGradients(double targetVal);
    void calcHiddenGradients(const Layer &nextLayer);
    void updateInputWeights(Layer &prevLayer);
    void static setEta(double new_eta){eta=new_eta;};
    void static setAlpha(double new_alpha){alpha=new_alpha;};
    vector<Connection> getWeights();
    void setWeights(vector<Connection> &v){m_outputWeights=v;};

private:
    static double eta;   // [0.0..1.0] overall net training rate
    static double alpha; // [0.0..n] multiplier of last weight change (momentum)
    static double transferFunction(double x);
    static double transferFunctionDerivative(double x);
    static double randomWeight(void) { return rand() / double(RAND_MAX); }
    double sumDOW(const Layer &nextLayer) const;
    double m_outputVal;
    vector<Connection> m_outputWeights;
    unsigned m_myIndex;
    double m_gradient;
};

double Neuron::eta = 0.003;    // overall net learning rate, [0.0..1.0]
double Neuron::alpha = 0.6;   // momentum, multiplier of last deltaWeight, [0.0..1.0]

vector<Connection> Neuron::getWeights(){
    return m_outputWeights;
}

void Neuron::updateInputWeights(Layer &prevLayer)
{
    // The weights to be updated are in the Connection container
    // in the neurons in the preceding layer

    for (unsigned n = 0; n < prevLayer.size(); ++n) {
        Neuron &neuron = prevLayer[n];
        double oldDeltaWeight = neuron.m_outputWeights[m_myIndex].deltaWeight;

        double newDeltaWeight =
                // Individual input, magnified by the gradient and train rate:
                eta
                * neuron.getOutputVal()
                * m_gradient
                // Also add momentum = a fraction of the previous delta weight;
                + alpha
                * oldDeltaWeight;

        neuron.m_outputWeights[m_myIndex].deltaWeight = newDeltaWeight;
        neuron.m_outputWeights[m_myIndex].weight += newDeltaWeight;
    }
}

double Neuron::sumDOW(const Layer &nextLayer) const
{
    double sum = 0.0;

    // Sum our contributions of the errors at the nodes we feed.

    for (unsigned n = 0; n < nextLayer.size() - 1; ++n) {
        sum += m_outputWeights[n].weight * nextLayer[n].m_gradient;
    }

    return sum;
}

void Neuron::calcHiddenGradients(const Layer &nextLayer)
{
    double dow = sumDOW(nextLayer);
    m_gradient = dow * Neuron::transferFunctionDerivative(m_outputVal);
}

void Neuron::calcOutputGradients(double targetVal)
{
    double delta = targetVal - m_outputVal;
    m_gradient = delta * Neuron::transferFunctionDerivative(m_outputVal);
}

double Neuron::transferFunction(double x)
{
    // tanh - output range [-1.0..1.0]

    return tanh(x);
}

double Neuron::transferFunctionDerivative(double x)
{
    // tanh derivative
    return 1.0 - pow(tanh(x), 2.0);
}

void Neuron::feedForward(const Layer &prevLayer)
{
    double sum = 0.0;

    // Sum the previous layer's outputs (which are our inputs)
    // Include the bias node from the previous layer.

    for (unsigned n = 0; n < prevLayer.size(); ++n) {
        sum += prevLayer[n].getOutputVal() *
                prevLayer[n].m_outputWeights[m_myIndex].weight;
    }

    m_outputVal = Neuron::transferFunction(sum);
}

Neuron::Neuron(unsigned numOutputs, unsigned myIndex)
{
    for (unsigned c = 0; c < numOutputs; ++c) {
        m_outputWeights.push_back(Connection());
        m_outputWeights.back().weight = randomWeight();
    }

    m_myIndex = myIndex;
}


// ****************** class Net ******************
class Net
{
public:
    Net(const vector<unsigned> &topology);
    void feedForward(const vector<double> &inputVals);
    void backProp(const vector<double> &targetVals);
    void getResults(vector<double> &resultVals) const;
    double getRecentAverageError(void) const { return m_recentAverageError; }
    void saveWeights(string file_name);
    void reloadWeights(string file_name);

private:
    vector<Layer> m_layers; // m_layers[layerNum][neuronNum]
    double m_error;
    double m_recentAverageError;
    static double m_recentAverageSmoothingFactor;
};


double Net::m_recentAverageSmoothingFactor = 100.0; // Number of training samples to average over

void Net::saveWeights(string file_name){
    ofstream NN_save;
    NN_save.open (file_name.c_str());

    for (unsigned l = 0; l < m_layers.size(); ++l) {
        NN_save << m_layers[l].size() <<";";
    }
    NN_save<<endl;
    for (unsigned l = 0; l < m_layers.size() - 1; ++l) {
        for (unsigned n = 0; n < m_layers[l].size(); ++n) {
            for(unsigned w = 0; w< m_layers[l][n].getWeights().size(); ++w){
                NN_save << m_layers[l][n].getWeights()[w].weight << ";";
            }
            NN_save<<endl;
        }
    }
    NN_save.close();
}

void Net::reloadWeights(string file_name){
    ifstream NN_reload;
    stringstream ss;
    NN_reload.open (file_name.c_str());
    string line;
    getline(NN_reload, line);
    unsigned int input_l;
    unsigned int hidden_l;
    unsigned int output_l;
    char trash;
    ss<<line;
    ss>>input_l;
    ss>>trash;
    ss>>hidden_l;
    ss>>trash;
    ss>>output_l;
    assert(hidden_l == m_layers[1].size());
    cout<<input_l<<" "<<hidden_l<<" "<<output_l<<endl;
    double weight;
    for(unsigned int n=0; n<input_l; ++n){
        vector<Connection> temp_vector;
        for(unsigned int w=0; w<hidden_l-1; ++w){
            NN_reload>>weight;
            NN_reload>>trash;
            Connection ct;
            ct.weight=weight;
            ct.deltaWeight=0.0;
            temp_vector.push_back(ct);
        }
        m_layers[0][n].setWeights(temp_vector);
    }

    for(unsigned int n=0; n<hidden_l; ++n){
        vector<Connection> temp_vector;
        for(unsigned int w=0; w<output_l-1; ++w){
            NN_reload>>weight;
            NN_reload>>trash;
            Connection ct;
            ct.weight=weight;
            ct.deltaWeight=0.0;
            temp_vector.push_back(ct);
        }
        m_layers[1][n].setWeights(temp_vector);
    }

    NN_reload.close();
}

void Net::getResults(vector<double> &resultVals) const
{
    resultVals.clear();

    for (unsigned n = 0; n < m_layers.back().size() - 1; ++n) {
        resultVals.push_back(m_layers.back()[n].getOutputVal());
    }
}

void Net::backProp(const vector<double> &targetVals)
{
    // Calculate overall net error (RMS of output neuron errors)

    Layer &outputLayer = m_layers.back();
    m_error = 0.0;

    for (unsigned n = 0; n < outputLayer.size() - 1; ++n) {
        double delta = targetVals[n] - outputLayer[n].getOutputVal();
        m_error += delta * delta;
    }
    m_error /= outputLayer.size() - 1; // get average error squared
    m_error = sqrt(m_error); // RMS

    // Implement a recent average measurement

    m_recentAverageError =
            (m_recentAverageError * m_recentAverageSmoothingFactor + m_error)
            / (m_recentAverageSmoothingFactor + 1.0);

    // Calculate output layer gradients

    for (unsigned n = 0; n < outputLayer.size() - 1; ++n) {
        outputLayer[n].calcOutputGradients(targetVals[n]);
    }

    // Calculate hidden layer gradients

    for (unsigned layerNum = m_layers.size() - 2; layerNum > 0; --layerNum) {
        Layer &hiddenLayer = m_layers[layerNum];
        Layer &nextLayer = m_layers[layerNum + 1];

        for (unsigned n = 0; n < hiddenLayer.size(); ++n) {
            hiddenLayer[n].calcHiddenGradients(nextLayer);
        }
    }

    // For all layers from outputs to first hidden layer,
    // update connection weights

    for (unsigned layerNum = m_layers.size() - 1; layerNum > 0; --layerNum) {
        Layer &layer = m_layers[layerNum];
        Layer &prevLayer = m_layers[layerNum - 1];

        for (unsigned n = 0; n < layer.size() - 1; ++n) {
            layer[n].updateInputWeights(prevLayer);
        }
    }
}

void Net::feedForward(const vector<double> &inputVals)
{
    assert(inputVals.size() == m_layers[0].size() - 1);

    // Assign (latch) the input values into the input neurons
    for (unsigned i = 0; i < inputVals.size(); ++i) {
        m_layers[0][i].setOutputVal(inputVals[i]);
    }

    // forward propagate
    for (unsigned layerNum = 1; layerNum < m_layers.size(); ++layerNum) {
        Layer &prevLayer = m_layers[layerNum - 1];
        for (unsigned n = 0; n < m_layers[layerNum].size() - 1; ++n) {
            m_layers[layerNum][n].feedForward(prevLayer);
        }
    }
}

Net::Net(const vector<unsigned> &topology)
{
    unsigned numLayers = topology.size();
    for (unsigned layerNum = 0; layerNum < numLayers; ++layerNum) {
        m_layers.push_back(Layer());
        unsigned numOutputs = layerNum == topology.size() - 1 ? 0 : topology[layerNum + 1];

        // We have a new layer, now fill it with neurons, and
        // add a bias neuron in each layer.
        for (unsigned neuronNum = 0; neuronNum <= topology[layerNum]; ++neuronNum) {
            m_layers.back().push_back(Neuron(numOutputs, neuronNum));
            //cout << "Made a Neuron!" << endl;
        }

        // Force the bias node's output to 1.0 (it was the last neuron pushed in this layer):
        m_layers.back().back().setOutputVal(1.0);
    }
}


void showVectorVals(string label, vector<double> &v)
{
    cout << label << " ";
    for (unsigned i = 0; i < v.size(); ++i) {
        cout << v[i] << " ";
    }

    cout << endl;
}

int getMaxPos(vector<double> &v)
{
    double max=v[0];
    int max_pos=0;

    for (unsigned int i=1; i<v.size(); ++i)
    {
        if (max<v[i])
        {
            max=v[i];
            max_pos=i;
        }
    }

    return max_pos;
}


int main()
{
    vector<unsigned> topology;
    topology.push_back(784); //input layer, number of neurons
    topology.push_back(300); //hidden layer
    topology.push_back(10); // output layer
    Net myNet(topology);

    vector<double> inputVals, targetVals, resultVals;

    //myNet.reloadWeights("twelveEpoch.csv");

    MNISTData trainData("train-images.idx3-ubyte", "train-labels.idx1-ubyte", 60000);

    unsigned int numberOfEpochs = 10;
    for(unsigned int i = 1; i<=numberOfEpochs; ++i ){
        for(unsigned int trainingPass = 0; trainingPass<60000; ++trainingPass) {
            cout << endl << "Epoch "<< i << " Pass " << trainingPass <<endl;

            // Get new input data and feed it forward:
            if (trainData.getNextInputs(inputVals) != topology[0]) {
                break;
            }
            myNet.feedForward(inputVals);

            // Collect the net's actual output results:
            myNet.getResults(resultVals);
            showVectorVals("Outputs:", resultVals);
            // Train the net what the outputs should have been:
            cout<<"Valid result: " <<trainData.getTargetOutputs(targetVals)<<endl;

            myNet.backProp(targetVals);

        }
        trainData.setBack();
        if(i==3){
            Neuron::setEta(0.002);
            Neuron::setAlpha(0.5);
        }
        if(i==6){
            Neuron::setEta(0.0001);
        }

    }

    myNet.saveWeights("threeEpoch_003_6_threeEpoch_002_5_fourEpoch_0001_5.csv");

    MNISTData testData("t10k-images.idx3-ubyte", "t10k-labels.idx1-ubyte", 10000);

    int correct = 0;
    int incorrect = 0;
    int NNAnswer;
    int validAnswer;

    for(unsigned int testPass = 0; testPass<10000; ++testPass) {

        if (testData.getNextInputs(inputVals) != topology[0]) {
            break;
        }

        myNet.feedForward(inputVals);
        myNet.getResults(resultVals);
        NNAnswer = getMaxPos(resultVals);
        validAnswer = testData.getTargetOutputs(targetVals);
        cout<<"Net Answer: " <<NNAnswer << ", valid answer: " <<validAnswer<<endl<<endl;
        if(NNAnswer==validAnswer){
            correct++;
        }
        else{
            incorrect++;
        }
        cout<<"Correct: "<< correct << " incorrect: "<< incorrect << endl;
    }


    cout << endl << (incorrect/(double)(incorrect + correct))*100 << " test error rate."<< endl << "Done" << endl;
}