Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <vector>
- #include <iostream>
- #include <cstdlib>
- #include <cassert>
- #include <cmath>
- #include <fstream>
- #include <sstream>
- #include <algorithm>
- using namespace std;
- int ReverseInt (int i)
- {
- unsigned char ch1, ch2, ch3, ch4;
- ch1=i&255;
- ch2=(i>>8)&255;
- ch3=(i>>16)&255;
- ch4=(i>>24)&255;
- return((int)ch1<<24)+((int)ch2<<16)+((int)ch3<<8)+ch4;
- }
- void ReadMNIST(string fileName, int NumberOfImages, int DataOfAnImage,vector<vector<double> > &arr)
- {
- arr.resize(NumberOfImages,vector<double>(DataOfAnImage));
- ifstream file (fileName.c_str(),ios::binary);
- if (file.is_open())
- {
- int magic_number=0;
- int number_of_images=0;
- int n_rows=0;
- int n_cols=0;
- file.read((char*)&magic_number,sizeof(magic_number));
- magic_number= ReverseInt(magic_number);
- file.read((char*)&number_of_images,sizeof(number_of_images));
- number_of_images= ReverseInt(number_of_images);
- file.read((char*)&n_rows,sizeof(n_rows));
- n_rows= ReverseInt(n_rows);
- file.read((char*)&n_cols,sizeof(n_cols));
- n_cols= ReverseInt(n_cols);
- for(int i=0;i<number_of_images;++i)
- {
- for(int r=0;r<n_rows;++r)
- {
- for(int c=0;c<n_cols;++c)
- {
- unsigned char temp=0;
- file.read((char*)&temp,sizeof(temp));
- arr[i][(n_rows*r)+c]= (double)temp;
- }
- }
- }
- }
- return;
- }
- void ReadMNIST_labels(string fileName, int NumberOfLabels, vector<double> &arr)
- {
- arr.resize(NumberOfLabels);
- ifstream file (fileName.c_str(),ios::binary);
- if (file.is_open())
- {
- int magic_number=0;
- int number_of_items=0;
- file.read((char*)&magic_number,sizeof(magic_number));
- magic_number= ReverseInt(magic_number);
- file.read((char*)&number_of_items,sizeof(number_of_items));
- number_of_items= ReverseInt(number_of_items);
- for(int i=0;i<number_of_items;++i)
- {
- unsigned char temp=0;
- file.read((char*)&temp,sizeof(temp));
- arr[i]= (double)temp;
- }
- }
- return;
- }
- class MNISTData
- {
- public:
- MNISTData(string ImagesFileName, string LabelsFileName, int nm);
- // Returns the number of input values read from the file:
- unsigned getNextInputs(vector<double> &inputVals);
- int getTargetOutputs(vector<double> &targetOutputVals);
- void setBack();
- private:
- vector<vector<double> > data;
- int count;
- };
- void MNISTData::setBack(){
- count=0;
- //shuffle the original database
- random_shuffle(data.begin(), data.end());
- }
- MNISTData::MNISTData(string ImagesFileName, string LabelsFileName, int nm)
- {
- vector<double> labels;
- ReadMNIST(ImagesFileName, nm, 784, data);
- ReadMNIST_labels(LabelsFileName, nm, labels);
- for(unsigned int i=0; i<data.size(); ++i){
- data[i].push_back(labels[i]);
- }
- count = 0;
- }
- unsigned MNISTData::getNextInputs(vector<double> &inputVals)
- {
- inputVals.clear();
- inputVals=data[count];
- inputVals.pop_back();
- return inputVals.size();
- }
- int MNISTData::getTargetOutputs(vector<double> &targetOutputVals)
- {
- targetOutputVals.clear();
- for(unsigned int i = 0; i<10; ++i){
- if(i==data[count].back()){
- targetOutputVals.push_back(1.0);
- }
- else{
- targetOutputVals.push_back(-1.0);
- }
- }
- count++;
- return data[count-1].back();
- }
- struct Connection
- {
- double weight;
- double deltaWeight;
- };
- class Neuron;
- typedef vector<Neuron> Layer;
- // ****************** class Neuron ******************
- class Neuron
- {
- public:
- Neuron(unsigned numOutputs, unsigned myIndex);
- void setOutputVal(double val) { m_outputVal = val; }
- double getOutputVal(void) const { return m_outputVal; }
- void feedForward(const Layer &prevLayer);
- void calcOutputGradients(double targetVal);
- void calcHiddenGradients(const Layer &nextLayer);
- void updateInputWeights(Layer &prevLayer);
- void static setEta(double new_eta){eta=new_eta;};
- void static setAlpha(double new_alpha){alpha=new_alpha;};
- vector<Connection> getWeights();
- void setWeights(vector<Connection> &v){m_outputWeights=v;};
- private:
- static double eta; // [0.0..1.0] overall net training rate
- static double alpha; // [0.0..n] multiplier of last weight change (momentum)
- static double transferFunction(double x);
- static double transferFunctionDerivative(double x);
- static double randomWeight(void) { return rand() / double(RAND_MAX); }
- double sumDOW(const Layer &nextLayer) const;
- double m_outputVal;
- vector<Connection> m_outputWeights;
- unsigned m_myIndex;
- double m_gradient;
- };
- double Neuron::eta = 0.003; // overall net learning rate, [0.0..1.0]
- double Neuron::alpha = 0.6; // momentum, multiplier of last deltaWeight, [0.0..1.0]
- vector<Connection> Neuron::getWeights(){
- return m_outputWeights;
- }
- void Neuron::updateInputWeights(Layer &prevLayer)
- {
- // The weights to be updated are in the Connection container
- // in the neurons in the preceding layer
- for (unsigned n = 0; n < prevLayer.size(); ++n) {
- Neuron &neuron = prevLayer[n];
- double oldDeltaWeight = neuron.m_outputWeights[m_myIndex].deltaWeight;
- double newDeltaWeight =
- // Individual input, magnified by the gradient and train rate:
- eta
- * neuron.getOutputVal()
- * m_gradient
- // Also add momentum = a fraction of the previous delta weight;
- + alpha
- * oldDeltaWeight;
- neuron.m_outputWeights[m_myIndex].deltaWeight = newDeltaWeight;
- neuron.m_outputWeights[m_myIndex].weight += newDeltaWeight;
- }
- }
- double Neuron::sumDOW(const Layer &nextLayer) const
- {
- double sum = 0.0;
- // Sum our contributions of the errors at the nodes we feed.
- for (unsigned n = 0; n < nextLayer.size() - 1; ++n) {
- sum += m_outputWeights[n].weight * nextLayer[n].m_gradient;
- }
- return sum;
- }
- void Neuron::calcHiddenGradients(const Layer &nextLayer)
- {
- double dow = sumDOW(nextLayer);
- m_gradient = dow * Neuron::transferFunctionDerivative(m_outputVal);
- }
- void Neuron::calcOutputGradients(double targetVal)
- {
- double delta = targetVal - m_outputVal;
- m_gradient = delta * Neuron::transferFunctionDerivative(m_outputVal);
- }
- double Neuron::transferFunction(double x)
- {
- // tanh - output range [-1.0..1.0]
- return tanh(x);
- }
- double Neuron::transferFunctionDerivative(double x)
- {
- // tanh derivative
- return 1.0 - pow(tanh(x), 2.0);
- }
- void Neuron::feedForward(const Layer &prevLayer)
- {
- double sum = 0.0;
- // Sum the previous layer's outputs (which are our inputs)
- // Include the bias node from the previous layer.
- for (unsigned n = 0; n < prevLayer.size(); ++n) {
- sum += prevLayer[n].getOutputVal() *
- prevLayer[n].m_outputWeights[m_myIndex].weight;
- }
- m_outputVal = Neuron::transferFunction(sum);
- }
- Neuron::Neuron(unsigned numOutputs, unsigned myIndex)
- {
- for (unsigned c = 0; c < numOutputs; ++c) {
- m_outputWeights.push_back(Connection());
- m_outputWeights.back().weight = randomWeight();
- }
- m_myIndex = myIndex;
- }
- // ****************** class Net ******************
- class Net
- {
- public:
- Net(const vector<unsigned> &topology);
- void feedForward(const vector<double> &inputVals);
- void backProp(const vector<double> &targetVals);
- void getResults(vector<double> &resultVals) const;
- double getRecentAverageError(void) const { return m_recentAverageError; }
- void saveWeights(string file_name);
- void reloadWeights(string file_name);
- private:
- vector<Layer> m_layers; // m_layers[layerNum][neuronNum]
- double m_error;
- double m_recentAverageError;
- static double m_recentAverageSmoothingFactor;
- };
- double Net::m_recentAverageSmoothingFactor = 100.0; // Number of training samples to average over
- void Net::saveWeights(string file_name){
- ofstream NN_save;
- NN_save.open (file_name.c_str());
- for (unsigned l = 0; l < m_layers.size(); ++l) {
- NN_save << m_layers[l].size() <<";";
- }
- NN_save<<endl;
- for (unsigned l = 0; l < m_layers.size() - 1; ++l) {
- for (unsigned n = 0; n < m_layers[l].size(); ++n) {
- for(unsigned w = 0; w< m_layers[l][n].getWeights().size(); ++w){
- NN_save << m_layers[l][n].getWeights()[w].weight << ";";
- }
- NN_save<<endl;
- }
- }
- NN_save.close();
- }
- void Net::reloadWeights(string file_name){
- ifstream NN_reload;
- stringstream ss;
- NN_reload.open (file_name.c_str());
- string line;
- getline(NN_reload, line);
- unsigned int input_l;
- unsigned int hidden_l;
- unsigned int output_l;
- char trash;
- ss<<line;
- ss>>input_l;
- ss>>trash;
- ss>>hidden_l;
- ss>>trash;
- ss>>output_l;
- assert(hidden_l == m_layers[1].size());
- cout<<input_l<<" "<<hidden_l<<" "<<output_l<<endl;
- double weight;
- for(unsigned int n=0; n<input_l; ++n){
- vector<Connection> temp_vector;
- for(unsigned int w=0; w<hidden_l-1; ++w){
- NN_reload>>weight;
- NN_reload>>trash;
- Connection ct;
- ct.weight=weight;
- ct.deltaWeight=0.0;
- temp_vector.push_back(ct);
- }
- m_layers[0][n].setWeights(temp_vector);
- }
- for(unsigned int n=0; n<hidden_l; ++n){
- vector<Connection> temp_vector;
- for(unsigned int w=0; w<output_l-1; ++w){
- NN_reload>>weight;
- NN_reload>>trash;
- Connection ct;
- ct.weight=weight;
- ct.deltaWeight=0.0;
- temp_vector.push_back(ct);
- }
- m_layers[1][n].setWeights(temp_vector);
- }
- NN_reload.close();
- }
- void Net::getResults(vector<double> &resultVals) const
- {
- resultVals.clear();
- for (unsigned n = 0; n < m_layers.back().size() - 1; ++n) {
- resultVals.push_back(m_layers.back()[n].getOutputVal());
- }
- }
- void Net::backProp(const vector<double> &targetVals)
- {
- // Calculate overall net error (RMS of output neuron errors)
- Layer &outputLayer = m_layers.back();
- m_error = 0.0;
- for (unsigned n = 0; n < outputLayer.size() - 1; ++n) {
- double delta = targetVals[n] - outputLayer[n].getOutputVal();
- m_error += delta * delta;
- }
- m_error /= outputLayer.size() - 1; // get average error squared
- m_error = sqrt(m_error); // RMS
- // Implement a recent average measurement
- m_recentAverageError =
- (m_recentAverageError * m_recentAverageSmoothingFactor + m_error)
- / (m_recentAverageSmoothingFactor + 1.0);
- // Calculate output layer gradients
- for (unsigned n = 0; n < outputLayer.size() - 1; ++n) {
- outputLayer[n].calcOutputGradients(targetVals[n]);
- }
- // Calculate hidden layer gradients
- for (unsigned layerNum = m_layers.size() - 2; layerNum > 0; --layerNum) {
- Layer &hiddenLayer = m_layers[layerNum];
- Layer &nextLayer = m_layers[layerNum + 1];
- for (unsigned n = 0; n < hiddenLayer.size(); ++n) {
- hiddenLayer[n].calcHiddenGradients(nextLayer);
- }
- }
- // For all layers from outputs to first hidden layer,
- // update connection weights
- for (unsigned layerNum = m_layers.size() - 1; layerNum > 0; --layerNum) {
- Layer &layer = m_layers[layerNum];
- Layer &prevLayer = m_layers[layerNum - 1];
- for (unsigned n = 0; n < layer.size() - 1; ++n) {
- layer[n].updateInputWeights(prevLayer);
- }
- }
- }
- void Net::feedForward(const vector<double> &inputVals)
- {
- assert(inputVals.size() == m_layers[0].size() - 1);
- // Assign (latch) the input values into the input neurons
- for (unsigned i = 0; i < inputVals.size(); ++i) {
- m_layers[0][i].setOutputVal(inputVals[i]);
- }
- // forward propagate
- for (unsigned layerNum = 1; layerNum < m_layers.size(); ++layerNum) {
- Layer &prevLayer = m_layers[layerNum - 1];
- for (unsigned n = 0; n < m_layers[layerNum].size() - 1; ++n) {
- m_layers[layerNum][n].feedForward(prevLayer);
- }
- }
- }
- Net::Net(const vector<unsigned> &topology)
- {
- unsigned numLayers = topology.size();
- for (unsigned layerNum = 0; layerNum < numLayers; ++layerNum) {
- m_layers.push_back(Layer());
- unsigned numOutputs = layerNum == topology.size() - 1 ? 0 : topology[layerNum + 1];
- // We have a new layer, now fill it with neurons, and
- // add a bias neuron in each layer.
- for (unsigned neuronNum = 0; neuronNum <= topology[layerNum]; ++neuronNum) {
- m_layers.back().push_back(Neuron(numOutputs, neuronNum));
- //cout << "Made a Neuron!" << endl;
- }
- // Force the bias node's output to 1.0 (it was the last neuron pushed in this layer):
- m_layers.back().back().setOutputVal(1.0);
- }
- }
- void showVectorVals(string label, vector<double> &v)
- {
- cout << label << " ";
- for (unsigned i = 0; i < v.size(); ++i) {
- cout << v[i] << " ";
- }
- cout << endl;
- }
- int getMaxPos(vector<double> &v)
- {
- double max=v[0];
- int max_pos=0;
- for (unsigned int i=1; i<v.size(); ++i)
- {
- if (max<v[i])
- {
- max=v[i];
- max_pos=i;
- }
- }
- return max_pos;
- }
- int main()
- {
- vector<unsigned> topology;
- topology.push_back(784); //input layer, number of neurons
- topology.push_back(300); //hidden layer
- topology.push_back(10); // output layer
- Net myNet(topology);
- vector<double> inputVals, targetVals, resultVals;
- //myNet.reloadWeights("twelveEpoch.csv");
- MNISTData trainData("train-images.idx3-ubyte", "train-labels.idx1-ubyte", 60000);
- unsigned int numberOfEpochs = 10;
- for(unsigned int i = 1; i<=numberOfEpochs; ++i ){
- for(unsigned int trainingPass = 0; trainingPass<60000; ++trainingPass) {
- cout << endl << "Epoch "<< i << " Pass " << trainingPass <<endl;
- // Get new input data and feed it forward:
- if (trainData.getNextInputs(inputVals) != topology[0]) {
- break;
- }
- myNet.feedForward(inputVals);
- // Collect the net's actual output results:
- myNet.getResults(resultVals);
- showVectorVals("Outputs:", resultVals);
- // Train the net what the outputs should have been:
- cout<<"Valid result: " <<trainData.getTargetOutputs(targetVals)<<endl;
- myNet.backProp(targetVals);
- }
- trainData.setBack();
- if(i==3){
- Neuron::setEta(0.002);
- Neuron::setAlpha(0.5);
- }
- if(i==6){
- Neuron::setEta(0.0001);
- }
- }
- myNet.saveWeights("threeEpoch_003_6_threeEpoch_002_5_fourEpoch_0001_5.csv");
- MNISTData testData("t10k-images.idx3-ubyte", "t10k-labels.idx1-ubyte", 10000);
- int correct = 0;
- int incorrect = 0;
- int NNAnswer;
- int validAnswer;
- for(unsigned int testPass = 0; testPass<10000; ++testPass) {
- if (testData.getNextInputs(inputVals) != topology[0]) {
- break;
- }
- myNet.feedForward(inputVals);
- myNet.getResults(resultVals);
- NNAnswer = getMaxPos(resultVals);
- validAnswer = testData.getTargetOutputs(targetVals);
- cout<<"Net Answer: " <<NNAnswer << ", valid answer: " <<validAnswer<<endl<<endl;
- if(NNAnswer==validAnswer){
- correct++;
- }
- else{
- incorrect++;
- }
- cout<<"Correct: "<< correct << " incorrect: "<< incorrect << endl;
- }
- cout << endl << (incorrect/(double)(incorrect + correct))*100 << " test error rate."<< endl << "Done" << endl;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement