Untitled

#include <iostream>
#include <cstdio>
#include <cmath>
#include <fstream>
#include <string>
#include <vector>

using namespace std;

ofstream logFile("last_log.txt");

double LEARNING_RATE = -1;

const double MOMENTUM_RATE = -1; // used to "nudge" weights away from local minima

int NUM_INPUT_NODES = -1;
int NUM_HIDDEN_NODES = -1;
int NUM_OUTPUT_NODES = -1; // must be one for now

int NUM_TRAINING_DATA_ROWS = -1;
int NUM_EPOCHS = -1;

const int BIAS_NODE_COUNT = 1;
const int BIAS_NODE_INDEX = 0;

const double BIAS_NODE_VALUE = -1.0f;

// these globals take bias nodes into account
int REAL_NUM_INPUT_NODES = -1;
int REAL_NUM_HIDDEN_NODES = -1;
int REAL_NUM_OUTPUT_NODES = -1;

const double SIGMOID_CONSTANT = 1.0f;

vector< std::vector<double> > w0;
vector< std::vector<double> > w1;

vector< std::vector<double> > w0_delta;
vector< std::vector<double> > w1_delta;

// datasets
vector< std::vector<double> > inputDataSet;
vector< vector<double> > targetDataSet;

// layer outputs
vector<double> input;
vector<double> hidden;
vector<double> output;

vector<double> target;

// layer errors
vector<double> inputError;
vector<double> hiddenError;
vector<double> outputError;

// input min/max
vector<double> inputMin;
vector<double> inputMax;

// prototypes
void backProp();
void printState();
double normInputEntry( int, double );


double sigmoid(double x) {
    return 1.0/(1 + exp( -x/SIGMOID_CONSTANT));
}

void initNetworkWeights() {

    for ( int inputIndex = 0; inputIndex < REAL_NUM_INPUT_NODES; inputIndex++ ) {

        for ( int hiddenIndex = 0; hiddenIndex < REAL_NUM_HIDDEN_NODES; hiddenIndex++ ) {

            if(hiddenIndex == BIAS_NODE_INDEX) {
                w0[inputIndex][hiddenIndex] = 0.0;// make sure we preserve bias node of hidden layer
            }
            else {
                w0[inputIndex][hiddenIndex] = (rand()/(double)RAND_MAX * .6) - 0.3; // random (enough) number from -0.5 to 0.5
            }
            w0_delta[inputIndex][hiddenIndex] = 0.0;
        }
    }

    for ( int hiddenIndex = 0; hiddenIndex < REAL_NUM_HIDDEN_NODES; hiddenIndex++ ) {

        for ( int outputIndex = 0; outputIndex < REAL_NUM_OUTPUT_NODES; outputIndex++ ) {
            w1[hiddenIndex][outputIndex] = (rand()/(double)RAND_MAX * .6) - 0.3;//rand()/(double)RAND_MAX - 0.5f; // random (enough) number from -0.5 to 0.5
            w1_delta[hiddenIndex][outputIndex] = 0.0;
        }
    }
}

void calcNetwork() {

     for ( int hiddenNodeIndex = 1; hiddenNodeIndex < REAL_NUM_HIDDEN_NODES; hiddenNodeIndex++ ) {
         hidden[hiddenNodeIndex] = 0.0f;

         for ( int inputNodeIndex = 0; inputNodeIndex < REAL_NUM_INPUT_NODES; inputNodeIndex++ ) {
             hidden[hiddenNodeIndex] += input[inputNodeIndex] * w0[inputNodeIndex][hiddenNodeIndex];
         }
         hidden[hiddenNodeIndex] = sigmoid(hidden[hiddenNodeIndex]);
     }

     for ( int outputNodeIndex = 0; outputNodeIndex < REAL_NUM_OUTPUT_NODES; outputNodeIndex++ ) {
         output[outputNodeIndex] = 0;

         for ( int hiddenNodeIndex = 0; hiddenNodeIndex < REAL_NUM_HIDDEN_NODES; hiddenNodeIndex++ ) {
             output[outputNodeIndex] += hidden[hiddenNodeIndex] * w1[hiddenNodeIndex][outputNodeIndex];
         }
     }
}

void backProp() {

    // calculate the output error
    for ( int outputNodeIndex = 0; outputNodeIndex < REAL_NUM_OUTPUT_NODES; outputNodeIndex++ ) {
        outputError[outputNodeIndex] = output[outputNodeIndex] *
                                       (1-output[outputNodeIndex]) *
                                       (target[outputNodeIndex]-output[outputNodeIndex]);
    }

   // calculate the error for the hidden layer
   for ( int hiddenErrorIndex = 0; hiddenErrorIndex < REAL_NUM_HIDDEN_NODES; hiddenErrorIndex++ ) {

      double errorSum = 0.0f;

      for( int outputNodeIndex = 0; outputNodeIndex < REAL_NUM_OUTPUT_NODES; outputNodeIndex++ ) {

         errorSum += w1[hiddenErrorIndex][outputNodeIndex] * outputError[outputNodeIndex];
      }
      hiddenError[hiddenErrorIndex] = hidden[hiddenErrorIndex] * (1.0f - hidden[hiddenErrorIndex]) * errorSum;
   }

    // update weights going to the output layer
    for ( int hiddenNodeIndex = 0; hiddenNodeIndex < REAL_NUM_HIDDEN_NODES; hiddenNodeIndex++ ) {
        for ( int outputNodeIndex = 0; outputNodeIndex < REAL_NUM_OUTPUT_NODES; outputNodeIndex++ ) {
          w1[hiddenNodeIndex][outputNodeIndex] += LEARNING_RATE * outputError[outputNodeIndex] * hidden[hiddenNodeIndex];
      }
   }

   // update weights going from the input to the output layer
   for ( int inputNodeIndex = 0; inputNodeIndex < REAL_NUM_INPUT_NODES; inputNodeIndex++ ) {
       for ( int hiddenNodeIndex = 1; hiddenNodeIndex < REAL_NUM_HIDDEN_NODES; hiddenNodeIndex++ ) {

          w0[inputNodeIndex][hiddenNodeIndex] += LEARNING_RATE * hiddenError[hiddenNodeIndex] * input[inputNodeIndex];
       }
   }
}

void printState() {

    logFile << "#########################\n######## STATE: #########\n#########################\n\n";

    logFile << "w0\n";
    for ( int x = 0; x <REAL_NUM_INPUT_NODES; x++ ) {
        for ( int y = 0; y <REAL_NUM_HIDDEN_NODES; y++ ) {
            logFile << w0[x][y] << " ";
        }
        logFile << endl;
    }
    logFile << endl;

    logFile << "w1\n";
    for ( int x = 0; x <REAL_NUM_HIDDEN_NODES; x++ ) {
        for ( int y = 0; y <REAL_NUM_OUTPUT_NODES; y++ ) {
            logFile << w1[x][y] << " ";
        }
        logFile << endl;
    }
    logFile << endl;

    logFile << "input\n";
    for ( int x = 0; x < REAL_NUM_INPUT_NODES; x++ ) {

        if(x==0) logFile << "(b)";
        logFile << input[x] << " ";
    }
    logFile << endl;
/*
    logFile << "inputDataSet\n";
    for ( int x = 0; x < NUM_TRAINING_DATA_ROWS; x++ ) {
        for ( int y = 0; y < NUM_INPUT_NODES; y++ ) {
            logFile << inputDataSet[x][y] << " ";
        }
        logFile << endl;
    }
    logFile << endl;
*/
    logFile << "inputMin\n";
    for ( int x = 0; x < NUM_INPUT_NODES; x++ ) {

        if(x==0) logFile << "(b)";
        logFile << inputMin[x] << " ";
    }
    logFile << endl;

    logFile << "inputMax\n";
    for ( int x = 0; x < NUM_INPUT_NODES; x++ ) {

        if(x==0) logFile << "(b)";
        logFile << inputMax[x] << " ";
    }
    logFile << endl;

    logFile << "hidden\n";
    for ( int x = 0; x < REAL_NUM_HIDDEN_NODES; x++ ) {
        logFile << hidden[x] << " ";
    }
    logFile << endl;

    logFile << "output\n";
    for ( int x = 0; x < REAL_NUM_OUTPUT_NODES; x++ ) {
        logFile << output[x] << " ";
    }
    logFile << endl;

    logFile << "target\n";
    for ( int x = 0; x < REAL_NUM_OUTPUT_NODES; x++ ) {
        logFile << target[x] << " ";
    }
    logFile << endl;


    logFile << "output error\n";

    double errorSum = 0.0f;
    for ( int x = 0; x < REAL_NUM_OUTPUT_NODES; x++ ) {
        errorSum += target[x] - output[x];
    }

    logFile << errorSum << "\n\n\n";

}

void prepareInput(std::string fileName) {

    double max; // there will never be negative input (for now)
    double min;

    ifstream dataFile(fileName.c_str());

    if(!dataFile) {
        cerr << "File Open FAILED\n";
        exit(1);
    }

    dataFile >> NUM_INPUT_NODES;
    dataFile >> NUM_HIDDEN_NODES;
    dataFile >> NUM_OUTPUT_NODES;

    dataFile >> NUM_TRAINING_DATA_ROWS;
    dataFile >> NUM_EPOCHS;

    dataFile >> LEARNING_RATE;

    logFile << "\nNUM_INPUT_NODES: " << NUM_INPUT_NODES;
    logFile << "\nNUM_HIDDEN_NODES: " << NUM_HIDDEN_NODES;
    logFile << "\nNUM_OUTPUT_NODES: " << NUM_OUTPUT_NODES;
    logFile << "\nNUM_TRAINING_DATA_ROWS: " << NUM_TRAINING_DATA_ROWS;
    logFile << "\nNUM_EPOCHS: " << NUM_EPOCHS;
    logFile << "\nLEARNING_RATE: " << LEARNING_RATE;
    logFile << endl;

    REAL_NUM_INPUT_NODES = NUM_INPUT_NODES + BIAS_NODE_COUNT;
    REAL_NUM_HIDDEN_NODES = NUM_HIDDEN_NODES + BIAS_NODE_COUNT;
    REAL_NUM_OUTPUT_NODES = NUM_OUTPUT_NODES;

    // setup all of the memory we will need
    inputMin.resize(NUM_INPUT_NODES);
    inputMax.resize(NUM_INPUT_NODES);

    input.resize(REAL_NUM_INPUT_NODES);
    hidden.resize(REAL_NUM_HIDDEN_NODES);
    output.resize(REAL_NUM_OUTPUT_NODES);

    target.resize(REAL_NUM_OUTPUT_NODES);

    inputError.resize(REAL_NUM_INPUT_NODES);
    hiddenError.resize(REAL_NUM_HIDDEN_NODES);
    outputError.resize(REAL_NUM_OUTPUT_NODES);

    inputDataSet.resize(NUM_TRAINING_DATA_ROWS, vector<double>(NUM_INPUT_NODES));

    w0.resize(REAL_NUM_INPUT_NODES, vector<double>(REAL_NUM_HIDDEN_NODES));
    w1.resize(REAL_NUM_HIDDEN_NODES, vector<double>(REAL_NUM_OUTPUT_NODES));

    w0_delta.resize(REAL_NUM_INPUT_NODES, vector<double>(REAL_NUM_HIDDEN_NODES));
    w1_delta.resize(REAL_NUM_HIDDEN_NODES, vector<double>(REAL_NUM_OUTPUT_NODES));

    targetDataSet.resize(NUM_TRAINING_DATA_ROWS, vector<double>(REAL_NUM_OUTPUT_NODES));

    cout << "inputDataSet.size(): " << inputDataSet.size() << endl;
    for( int  inputMinIndex = 0; inputMinIndex < NUM_INPUT_NODES; inputMinIndex++ ) {
         dataFile >> inputMin[inputMinIndex];
    }
    for( int  inputMaxIndex = 0; inputMaxIndex < NUM_INPUT_NODES; inputMaxIndex++ ) {
         dataFile >> inputMax[inputMaxIndex];
    }

    // fill in the input and target data
    for( int dataRowIndex = 0; dataRowIndex < NUM_TRAINING_DATA_ROWS; dataRowIndex++ ) {
        cout << "inputDataSet[" <<  dataRowIndex <<"].size(): " << inputDataSet[0].size() << endl;
        for( int inputNodeIndex = 0; inputNodeIndex < NUM_INPUT_NODES; inputNodeIndex++ ) {
            dataFile >> inputDataSet[dataRowIndex][inputNodeIndex];
            logFile << "i[" << dataRowIndex << "]["
                    << inputNodeIndex << "]: " << inputDataSet[dataRowIndex][inputNodeIndex] << " ";
            // normalize data
            inputDataSet[dataRowIndex][inputNodeIndex] = normInputEntry(inputNodeIndex, inputDataSet[dataRowIndex][inputNodeIndex]);
            logFile << "Before assert: inputDataSet[" << dataRowIndex << "]["
                    << inputNodeIndex << "] = " << inputDataSet[dataRowIndex][inputNodeIndex] << endl;
            assert(inputDataSet[dataRowIndex][inputNodeIndex] >= 0.0);

        }
        for( int targetNodeIndex = 0; targetNodeIndex < REAL_NUM_OUTPUT_NODES; targetNodeIndex++ ) {

                dataFile >> targetDataSet[dataRowIndex][targetNodeIndex];
                logFile << "t[" << dataRowIndex << "]["
                    << targetNodeIndex << "]: " << targetDataSet[dataRowIndex][targetNodeIndex] << " ";
        }
        logFile << endl;
    }
    printState();
    logFile << "\nAfter File Read: \n";
    printState();
    dataFile.close();
}

double normInputEntry(int inputIndex, double inputVal) {

   double result = (inputVal - inputMin[inputIndex])/(inputMax[inputIndex] - inputMin[inputIndex]);

   logFile << "Converted input " << inputVal
           << " into " << result << " (min: " << inputMin[inputIndex]
           << ", max: " << inputMax[inputIndex] << ", inputIndex: " << inputIndex << ")\n";

   return result;
}

int main() {

    srand((unsigned)time(0));

    prepareInput("iris_data.txt");
    initNetworkWeights();

    for( int epochIndex = 0; epochIndex < NUM_EPOCHS; epochIndex++ ) {

        for( int dataRowIndex = 0; dataRowIndex < NUM_TRAINING_DATA_ROWS; dataRowIndex++ ) {
            logFile << "Epoch " << epochIndex << " row "
                << dataRowIndex << " ("
                << (epochIndex*NUM_TRAINING_DATA_ROWS + dataRowIndex)/(NUM_EPOCHS*NUM_TRAINING_DATA_ROWS)
                << "%)\n";
            cout << "Epoch " << epochIndex << " row "
                << dataRowIndex << " ("
                << 100*(epochIndex*NUM_TRAINING_DATA_ROWS + dataRowIndex)/(NUM_EPOCHS*NUM_TRAINING_DATA_ROWS)
                << "%)\n";

            // present the input
            input[0] = BIAS_NODE_VALUE;
            for( int inputNodeIndex = 1; inputNodeIndex < REAL_NUM_INPUT_NODES; inputNodeIndex++ ) {
                input[inputNodeIndex] = inputDataSet[dataRowIndex][inputNodeIndex-1];
            }
            // present the targets
            for( int targetNodeIndex = 0; targetNodeIndex < REAL_NUM_OUTPUT_NODES; targetNodeIndex++ ) {
                target[targetNodeIndex] = targetDataSet[dataRowIndex][targetNodeIndex];
            }

            calcNetwork();
            backProp();
            printState();

            if(outputError[0] != outputError[0]) {
                initNetworkWeights();
                epochIndex = 0;
                dataRowIndex = 0;
                cout << "\n\nQNAN FOUND during data row " << dataRowIndex << ". Resetting weights.\n";
            }
        }
    }

    printState();

    double num1 = -1;

    do {
        input[0] = BIAS_NODE_VALUE;
        for( int inputNodeIndex = 1; inputNodeIndex < REAL_NUM_INPUT_NODES; inputNodeIndex++ ) {
            cout << "enter num ( < 0 to quit)" << inputNodeIndex << ":";
            cin >> input[inputNodeIndex];

            if(input[inputNodeIndex] < 0)
                return 1;
            input[inputNodeIndex] = normInputEntry( inputNodeIndex-1, input[inputNodeIndex] );// MAX_INPUT_NUMBER;
        }

        calcNetwork();
        printState();
        cout << "network response: ";
        for( int outputNodeIndex = 0; outputNodeIndex < REAL_NUM_OUTPUT_NODES; outputNodeIndex++ ) {
            cout << output[outputNodeIndex] << " ";
        }
        cout << endl;

    } while ( 1 );

    logFile.close();
}