Advertisement
Guest User

Untitled

a guest
May 26th, 2016
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 16.12 KB | None | 0 0
  1. #include <vector>
  2. #include <iostream>
  3. #include <cstdlib>
  4. #include <cassert>
  5. #include <cmath>
  6. #include <fstream>
  7. #include <sstream>
  8. #include <algorithm>
  9.  
  10.  
  11.  
  12.  
  13. using namespace std;
  14.  
  15. int ReverseInt (int i)
  16. {
  17.     unsigned char ch1, ch2, ch3, ch4;
  18.     ch1=i&255;
  19.     ch2=(i>>8)&255;
  20.     ch3=(i>>16)&255;
  21.     ch4=(i>>24)&255;
  22.     return((int)ch1<<24)+((int)ch2<<16)+((int)ch3<<8)+ch4;
  23. }
  24. void ReadMNIST(string fileName, int NumberOfImages, int DataOfAnImage,vector<vector<double> > &arr)
  25. {
  26.     arr.resize(NumberOfImages,vector<double>(DataOfAnImage));
  27.     ifstream file (fileName.c_str(),ios::binary);
  28.     if (file.is_open())
  29.     {
  30.  
  31.         int magic_number=0;
  32.         int number_of_images=0;
  33.         int n_rows=0;
  34.         int n_cols=0;
  35.         file.read((char*)&magic_number,sizeof(magic_number));
  36.         magic_number= ReverseInt(magic_number);
  37.         file.read((char*)&number_of_images,sizeof(number_of_images));
  38.         number_of_images= ReverseInt(number_of_images);
  39.         file.read((char*)&n_rows,sizeof(n_rows));
  40.         n_rows= ReverseInt(n_rows);
  41.         file.read((char*)&n_cols,sizeof(n_cols));
  42.         n_cols= ReverseInt(n_cols);
  43.         for(int i=0;i<number_of_images;++i)
  44.         {
  45.             for(int r=0;r<n_rows;++r)
  46.             {
  47.                 for(int c=0;c<n_cols;++c)
  48.                 {
  49.                     unsigned char temp=0;
  50.                     file.read((char*)&temp,sizeof(temp));
  51.                     arr[i][(n_rows*r)+c]= (double)temp;
  52.                 }
  53.             }
  54.  
  55.         }
  56.     }
  57.     return;
  58. }
  59.  
  60.  
  61. void ReadMNIST_labels(string fileName, int NumberOfLabels, vector<double> &arr)
  62. {
  63.     arr.resize(NumberOfLabels);
  64.     ifstream file (fileName.c_str(),ios::binary);
  65.     if (file.is_open())
  66.     {
  67.  
  68.         int magic_number=0;
  69.         int number_of_items=0;
  70.         file.read((char*)&magic_number,sizeof(magic_number));
  71.         magic_number= ReverseInt(magic_number);
  72.         file.read((char*)&number_of_items,sizeof(number_of_items));
  73.         number_of_items= ReverseInt(number_of_items);
  74.         for(int i=0;i<number_of_items;++i)
  75.         {
  76.             unsigned char temp=0;
  77.             file.read((char*)&temp,sizeof(temp));
  78.             arr[i]= (double)temp;
  79.         }
  80.     }
  81.     return;
  82. }
  83.  
  84.  
  85. class MNISTData
  86. {
  87. public:
  88.     MNISTData(string ImagesFileName, string LabelsFileName, int nm);
  89.  
  90.     // Returns the number of input values read from the file:
  91.     unsigned getNextInputs(vector<double> &inputVals);
  92.     int getTargetOutputs(vector<double> &targetOutputVals);
  93.     void setBack();
  94.  
  95. private:
  96.     vector<vector<double> > data;
  97.     int count;
  98. };
  99.  
  100.  
  101. void MNISTData::setBack(){
  102.     count=0;
  103.  
  104.     //shuffle the original database
  105.     random_shuffle(data.begin(), data.end());
  106. }
  107.  
  108.  
  109. MNISTData::MNISTData(string ImagesFileName, string LabelsFileName, int nm)
  110. {
  111.     vector<double> labels;
  112.     ReadMNIST(ImagesFileName, nm, 784, data);
  113.     ReadMNIST_labels(LabelsFileName, nm, labels);
  114.     for(unsigned int i=0; i<data.size(); ++i){
  115.         data[i].push_back(labels[i]);
  116.     }
  117.     count = 0;
  118. }
  119.  
  120. unsigned MNISTData::getNextInputs(vector<double> &inputVals)
  121. {
  122.     inputVals.clear();
  123.     inputVals=data[count];
  124.     inputVals.pop_back();
  125.     return inputVals.size();
  126. }
  127.  
  128. int MNISTData::getTargetOutputs(vector<double> &targetOutputVals)
  129. {
  130.     targetOutputVals.clear();
  131.  
  132.     for(unsigned int i = 0; i<10; ++i){
  133.         if(i==data[count].back()){
  134.             targetOutputVals.push_back(1.0);
  135.         }
  136.         else{
  137.             targetOutputVals.push_back(-1.0);
  138.         }
  139.     }
  140.  
  141.     count++;
  142.     return data[count-1].back();
  143. }
  144.  
  145.  
  146. struct Connection
  147. {
  148.     double weight;
  149.     double deltaWeight;
  150. };
  151.  
  152.  
  153. class Neuron;
  154.  
  155. typedef vector<Neuron> Layer;
  156.  
  157. // ****************** class Neuron ******************
  158. class Neuron
  159. {
  160. public:
  161.     Neuron(unsigned numOutputs, unsigned myIndex);
  162.     void setOutputVal(double val) { m_outputVal = val; }
  163.     double getOutputVal(void) const { return m_outputVal; }
  164.     void feedForward(const Layer &prevLayer);
  165.     void calcOutputGradients(double targetVal);
  166.     void calcHiddenGradients(const Layer &nextLayer);
  167.     void updateInputWeights(Layer &prevLayer);
  168.     void static setEta(double new_eta){eta=new_eta;};
  169.     void static setAlpha(double new_alpha){alpha=new_alpha;};
  170.     vector<Connection> getWeights();
  171.     void setWeights(vector<Connection> &v){m_outputWeights=v;};
  172.  
  173. private:
  174.     static double eta;   // [0.0..1.0] overall net training rate
  175.     static double alpha; // [0.0..n] multiplier of last weight change (momentum)
  176.     static double transferFunction(double x);
  177.     static double transferFunctionDerivative(double x);
  178.     static double randomWeight(void) { return rand() / double(RAND_MAX); }
  179.     double sumDOW(const Layer &nextLayer) const;
  180.     double m_outputVal;
  181.     vector<Connection> m_outputWeights;
  182.     unsigned m_myIndex;
  183.     double m_gradient;
  184. };
  185.  
  186. double Neuron::eta = 0.003;    // overall net learning rate, [0.0..1.0]
  187. double Neuron::alpha = 0.6;   // momentum, multiplier of last deltaWeight, [0.0..1.0]
  188.  
  189. vector<Connection> Neuron::getWeights(){
  190.     return m_outputWeights;
  191. }
  192.  
  193. void Neuron::updateInputWeights(Layer &prevLayer)
  194. {
  195.     // The weights to be updated are in the Connection container
  196.     // in the neurons in the preceding layer
  197.  
  198.     for (unsigned n = 0; n < prevLayer.size(); ++n) {
  199.         Neuron &neuron = prevLayer[n];
  200.         double oldDeltaWeight = neuron.m_outputWeights[m_myIndex].deltaWeight;
  201.  
  202.         double newDeltaWeight =
  203.                 // Individual input, magnified by the gradient and train rate:
  204.                 eta
  205.                 * neuron.getOutputVal()
  206.                 * m_gradient
  207.                 // Also add momentum = a fraction of the previous delta weight;
  208.                 + alpha
  209.                 * oldDeltaWeight;
  210.  
  211.         neuron.m_outputWeights[m_myIndex].deltaWeight = newDeltaWeight;
  212.         neuron.m_outputWeights[m_myIndex].weight += newDeltaWeight;
  213.     }
  214. }
  215.  
  216. double Neuron::sumDOW(const Layer &nextLayer) const
  217. {
  218.     double sum = 0.0;
  219.  
  220.     // Sum our contributions of the errors at the nodes we feed.
  221.  
  222.     for (unsigned n = 0; n < nextLayer.size() - 1; ++n) {
  223.         sum += m_outputWeights[n].weight * nextLayer[n].m_gradient;
  224.     }
  225.  
  226.     return sum;
  227. }
  228.  
  229. void Neuron::calcHiddenGradients(const Layer &nextLayer)
  230. {
  231.     double dow = sumDOW(nextLayer);
  232.     m_gradient = dow * Neuron::transferFunctionDerivative(m_outputVal);
  233. }
  234.  
  235. void Neuron::calcOutputGradients(double targetVal)
  236. {
  237.     double delta = targetVal - m_outputVal;
  238.     m_gradient = delta * Neuron::transferFunctionDerivative(m_outputVal);
  239. }
  240.  
  241. double Neuron::transferFunction(double x)
  242. {
  243.     // tanh - output range [-1.0..1.0]
  244.  
  245.     return tanh(x);
  246. }
  247.  
  248. double Neuron::transferFunctionDerivative(double x)
  249. {
  250.     // tanh derivative
  251.     return 1.0 - pow(tanh(x), 2.0);
  252. }
  253.  
  254. void Neuron::feedForward(const Layer &prevLayer)
  255. {
  256.     double sum = 0.0;
  257.  
  258.     // Sum the previous layer's outputs (which are our inputs)
  259.     // Include the bias node from the previous layer.
  260.  
  261.     for (unsigned n = 0; n < prevLayer.size(); ++n) {
  262.         sum += prevLayer[n].getOutputVal() *
  263.                 prevLayer[n].m_outputWeights[m_myIndex].weight;
  264.     }
  265.  
  266.     m_outputVal = Neuron::transferFunction(sum);
  267. }
  268.  
  269. Neuron::Neuron(unsigned numOutputs, unsigned myIndex)
  270. {
  271.     for (unsigned c = 0; c < numOutputs; ++c) {
  272.         m_outputWeights.push_back(Connection());
  273.         m_outputWeights.back().weight = randomWeight();
  274.     }
  275.  
  276.     m_myIndex = myIndex;
  277. }
  278.  
  279.  
  280. // ****************** class Net ******************
  281. class Net
  282. {
  283. public:
  284.     Net(const vector<unsigned> &topology);
  285.     void feedForward(const vector<double> &inputVals);
  286.     void backProp(const vector<double> &targetVals);
  287.     void getResults(vector<double> &resultVals) const;
  288.     double getRecentAverageError(void) const { return m_recentAverageError; }
  289.     void saveWeights(string file_name);
  290.     void reloadWeights(string file_name);
  291.  
  292. private:
  293.     vector<Layer> m_layers; // m_layers[layerNum][neuronNum]
  294.     double m_error;
  295.     double m_recentAverageError;
  296.     static double m_recentAverageSmoothingFactor;
  297. };
  298.  
  299.  
  300. double Net::m_recentAverageSmoothingFactor = 100.0; // Number of training samples to average over
  301.  
  302. void Net::saveWeights(string file_name){
  303.     ofstream NN_save;
  304.     NN_save.open (file_name.c_str());
  305.  
  306.     for (unsigned l = 0; l < m_layers.size(); ++l) {
  307.         NN_save << m_layers[l].size() <<";";
  308.     }
  309.     NN_save<<endl;
  310.     for (unsigned l = 0; l < m_layers.size() - 1; ++l) {
  311.         for (unsigned n = 0; n < m_layers[l].size(); ++n) {
  312.             for(unsigned w = 0; w< m_layers[l][n].getWeights().size(); ++w){
  313.                 NN_save << m_layers[l][n].getWeights()[w].weight << ";";
  314.             }
  315.             NN_save<<endl;
  316.         }
  317.     }
  318.     NN_save.close();
  319. }
  320.  
  321. void Net::reloadWeights(string file_name){
  322.     ifstream NN_reload;
  323.     stringstream ss;
  324.     NN_reload.open (file_name.c_str());
  325.     string line;
  326.     getline(NN_reload, line);
  327.     unsigned int input_l;
  328.     unsigned int hidden_l;
  329.     unsigned int output_l;
  330.     char trash;
  331.     ss<<line;
  332.     ss>>input_l;
  333.     ss>>trash;
  334.     ss>>hidden_l;
  335.     ss>>trash;
  336.     ss>>output_l;
  337.     assert(hidden_l == m_layers[1].size());
  338.     cout<<input_l<<" "<<hidden_l<<" "<<output_l<<endl;
  339.     double weight;
  340.     for(unsigned int n=0; n<input_l; ++n){
  341.         vector<Connection> temp_vector;
  342.         for(unsigned int w=0; w<hidden_l-1; ++w){
  343.             NN_reload>>weight;
  344.             NN_reload>>trash;
  345.             Connection ct;
  346.             ct.weight=weight;
  347.             ct.deltaWeight=0.0;
  348.             temp_vector.push_back(ct);
  349.         }
  350.         m_layers[0][n].setWeights(temp_vector);
  351.     }
  352.  
  353.     for(unsigned int n=0; n<hidden_l; ++n){
  354.         vector<Connection> temp_vector;
  355.         for(unsigned int w=0; w<output_l-1; ++w){
  356.             NN_reload>>weight;
  357.             NN_reload>>trash;
  358.             Connection ct;
  359.             ct.weight=weight;
  360.             ct.deltaWeight=0.0;
  361.             temp_vector.push_back(ct);
  362.         }
  363.         m_layers[1][n].setWeights(temp_vector);
  364.     }
  365.  
  366.     NN_reload.close();
  367. }
  368.  
  369. void Net::getResults(vector<double> &resultVals) const
  370. {
  371.     resultVals.clear();
  372.  
  373.     for (unsigned n = 0; n < m_layers.back().size() - 1; ++n) {
  374.         resultVals.push_back(m_layers.back()[n].getOutputVal());
  375.     }
  376. }
  377.  
  378. void Net::backProp(const vector<double> &targetVals)
  379. {
  380.     // Calculate overall net error (RMS of output neuron errors)
  381.  
  382.     Layer &outputLayer = m_layers.back();
  383.     m_error = 0.0;
  384.  
  385.     for (unsigned n = 0; n < outputLayer.size() - 1; ++n) {
  386.         double delta = targetVals[n] - outputLayer[n].getOutputVal();
  387.         m_error += delta * delta;
  388.     }
  389.     m_error /= outputLayer.size() - 1; // get average error squared
  390.     m_error = sqrt(m_error); // RMS
  391.  
  392.     // Implement a recent average measurement
  393.  
  394.     m_recentAverageError =
  395.             (m_recentAverageError * m_recentAverageSmoothingFactor + m_error)
  396.             / (m_recentAverageSmoothingFactor + 1.0);
  397.  
  398.     // Calculate output layer gradients
  399.  
  400.     for (unsigned n = 0; n < outputLayer.size() - 1; ++n) {
  401.         outputLayer[n].calcOutputGradients(targetVals[n]);
  402.     }
  403.  
  404.     // Calculate hidden layer gradients
  405.  
  406.     for (unsigned layerNum = m_layers.size() - 2; layerNum > 0; --layerNum) {
  407.         Layer &hiddenLayer = m_layers[layerNum];
  408.         Layer &nextLayer = m_layers[layerNum + 1];
  409.  
  410.         for (unsigned n = 0; n < hiddenLayer.size(); ++n) {
  411.             hiddenLayer[n].calcHiddenGradients(nextLayer);
  412.         }
  413.     }
  414.  
  415.     // For all layers from outputs to first hidden layer,
  416.     // update connection weights
  417.  
  418.     for (unsigned layerNum = m_layers.size() - 1; layerNum > 0; --layerNum) {
  419.         Layer &layer = m_layers[layerNum];
  420.         Layer &prevLayer = m_layers[layerNum - 1];
  421.  
  422.         for (unsigned n = 0; n < layer.size() - 1; ++n) {
  423.             layer[n].updateInputWeights(prevLayer);
  424.         }
  425.     }
  426. }
  427.  
  428. void Net::feedForward(const vector<double> &inputVals)
  429. {
  430.     assert(inputVals.size() == m_layers[0].size() - 1);
  431.  
  432.     // Assign (latch) the input values into the input neurons
  433.     for (unsigned i = 0; i < inputVals.size(); ++i) {
  434.         m_layers[0][i].setOutputVal(inputVals[i]);
  435.     }
  436.  
  437.     // forward propagate
  438.     for (unsigned layerNum = 1; layerNum < m_layers.size(); ++layerNum) {
  439.         Layer &prevLayer = m_layers[layerNum - 1];
  440.         for (unsigned n = 0; n < m_layers[layerNum].size() - 1; ++n) {
  441.             m_layers[layerNum][n].feedForward(prevLayer);
  442.         }
  443.     }
  444. }
  445.  
  446. Net::Net(const vector<unsigned> &topology)
  447. {
  448.     unsigned numLayers = topology.size();
  449.     for (unsigned layerNum = 0; layerNum < numLayers; ++layerNum) {
  450.         m_layers.push_back(Layer());
  451.         unsigned numOutputs = layerNum == topology.size() - 1 ? 0 : topology[layerNum + 1];
  452.  
  453.         // We have a new layer, now fill it with neurons, and
  454.         // add a bias neuron in each layer.
  455.         for (unsigned neuronNum = 0; neuronNum <= topology[layerNum]; ++neuronNum) {
  456.             m_layers.back().push_back(Neuron(numOutputs, neuronNum));
  457.             //cout << "Made a Neuron!" << endl;
  458.         }
  459.  
  460.         // Force the bias node's output to 1.0 (it was the last neuron pushed in this layer):
  461.         m_layers.back().back().setOutputVal(1.0);
  462.     }
  463. }
  464.  
  465.  
  466. void showVectorVals(string label, vector<double> &v)
  467. {
  468.     cout << label << " ";
  469.     for (unsigned i = 0; i < v.size(); ++i) {
  470.         cout << v[i] << " ";
  471.     }
  472.  
  473.     cout << endl;
  474. }
  475.  
  476. int getMaxPos(vector<double> &v)
  477. {
  478.     double max=v[0];
  479.     int max_pos=0;
  480.  
  481.     for (unsigned int i=1; i<v.size(); ++i)
  482.     {
  483.         if (max<v[i])
  484.         {
  485.             max=v[i];
  486.             max_pos=i;
  487.         }
  488.     }
  489.  
  490.     return max_pos;
  491. }
  492.  
  493.  
  494. int main()
  495. {
  496.     vector<unsigned> topology;
  497.     topology.push_back(784); //input layer, number of neurons
  498.     topology.push_back(300); //hidden layer
  499.     topology.push_back(10); // output layer
  500.     Net myNet(topology);
  501.  
  502.     vector<double> inputVals, targetVals, resultVals;
  503.  
  504.     //myNet.reloadWeights("twelveEpoch.csv");
  505.  
  506.     MNISTData trainData("train-images.idx3-ubyte", "train-labels.idx1-ubyte", 60000);
  507.  
  508.     unsigned int numberOfEpochs = 10;
  509.     for(unsigned int i = 1; i<=numberOfEpochs; ++i ){
  510.         for(unsigned int trainingPass = 0; trainingPass<60000; ++trainingPass) {
  511.             cout << endl << "Epoch "<< i << " Pass " << trainingPass <<endl;
  512.  
  513.             // Get new input data and feed it forward:
  514.             if (trainData.getNextInputs(inputVals) != topology[0]) {
  515.                 break;
  516.             }
  517.             myNet.feedForward(inputVals);
  518.  
  519.             // Collect the net's actual output results:
  520.             myNet.getResults(resultVals);
  521.             showVectorVals("Outputs:", resultVals);
  522.             // Train the net what the outputs should have been:
  523.             cout<<"Valid result: " <<trainData.getTargetOutputs(targetVals)<<endl;
  524.  
  525.             myNet.backProp(targetVals);
  526.  
  527.         }
  528.         trainData.setBack();
  529.         if(i==3){
  530.             Neuron::setEta(0.002);
  531.             Neuron::setAlpha(0.5);
  532.         }
  533.         if(i==6){
  534.             Neuron::setEta(0.0001);
  535.         }
  536.  
  537.     }
  538.  
  539.     myNet.saveWeights("threeEpoch_003_6_threeEpoch_002_5_fourEpoch_0001_5.csv");
  540.  
  541.     MNISTData testData("t10k-images.idx3-ubyte", "t10k-labels.idx1-ubyte", 10000);
  542.  
  543.     int correct = 0;
  544.     int incorrect = 0;
  545.     int NNAnswer;
  546.     int validAnswer;
  547.  
  548.     for(unsigned int testPass = 0; testPass<10000; ++testPass) {
  549.  
  550.         if (testData.getNextInputs(inputVals) != topology[0]) {
  551.             break;
  552.         }
  553.  
  554.         myNet.feedForward(inputVals);
  555.         myNet.getResults(resultVals);
  556.         NNAnswer = getMaxPos(resultVals);
  557.         validAnswer = testData.getTargetOutputs(targetVals);
  558.         cout<<"Net Answer: " <<NNAnswer << ", valid answer: " <<validAnswer<<endl<<endl;
  559.         if(NNAnswer==validAnswer){
  560.             correct++;
  561.         }
  562.         else{
  563.             incorrect++;
  564.         }
  565.         cout<<"Correct: "<< correct << " incorrect: "<< incorrect << endl;
  566.     }
  567.  
  568.  
  569.     cout << endl << (incorrect/(double)(incorrect + correct))*100 << " test error rate."<< endl << "Done" << endl;
  570. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement