SVM_OpenCV_tryout

/***************************************************************************************************
Copyright (c) 2013 EAVISE, KU Leuven, Campus De Nayer
Contact: steven.puttemans[at]kuleuven.be

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

*****************************************************************************************************

Software for creating an SVM model based on training data
INPUT: data_root_folder positive_training_samples negative_training_samples
OUTPUT: SVM_xml_model

Extra info
- Cookies average dimensions : w = 122px | h = 117px
- All positive and negative images should have this size for SVM model training to work properly
- If positives have different sizes, resize them using the batch_resize_segmentations utility

*****************************************************************************************************/
#include "stdafx.h"

// OpenCV include all functionality
#include "opencv2/opencv.hpp";

// Extra includes for file processing
#include <vector>
#include <sstream>
#include <fstream>

// Open correct namespaces
using namespace std;
using namespace cv;

// HOGDescriptor visual_image analyzing
// Adapted from http://www.juergenwiki.de/work/wiki/doku.php?id=public%3ahog_descriptor_computation_and_visualization
// ONLY PRECAUSIONS ARE
// --> Image size width/heigth needs to be a multiple of block width/heigth
// --> Block size width/heigth (multiple cells) needs to be a multiple of cell size (histogram region) width/heigth
// --> Block stride needs to be a multiple of a cell size, however current code only allows to use a block stride = cell size!
// --> ScaleFactor enlarges the image patch to make it visible (e.g. a patch of 50x50 could have a factor 10 to be visible at scale 500x500 for inspection)
// --> viz_factor enlarges the maximum size of the maximal gradient length for normalization. At viz_factor = 1 it results in a length = half the cell width
Mat get_hogdescriptor_visual_image(Mat& origImg, vector<float>& descriptorValues, Size winSize, Size cellSize, int scaleFactor, double viz_factor)
{
    Mat visual_image;
    resize(origImg, visual_image, Size(origImg.cols*scaleFactor, origImg.rows*scaleFactor));

    int gradientBinSize = 9;
    float radRangeForOneBin = 3.14/(float)gradientBinSize; // dividing 180� into 9 bins, how large (in rad) is one bin?

    // prepare data structure: 9 orientation / gradient strenghts for each cell
    int cells_in_x_dir = winSize.width / cellSize.width;
    int cells_in_y_dir = winSize.height / cellSize.height;
    int totalnrofcells = cells_in_x_dir * cells_in_y_dir;
    float*** gradientStrengths = new float**[cells_in_y_dir];
    int** cellUpdateCounter   = new int*[cells_in_y_dir];
    for (int y=0; y<cells_in_y_dir; y++)
    {
        gradientStrengths[y] = new float*[cells_in_x_dir];
        cellUpdateCounter[y] = new int[cells_in_x_dir];
        for (int x=0; x<cells_in_x_dir; x++)
        {
            gradientStrengths[y][x] = new float[gradientBinSize];
            cellUpdateCounter[y][x] = 0;

            for (int bin=0; bin<gradientBinSize; bin++)
                gradientStrengths[y][x][bin] = 0.0;
        }
    }

    // nr of blocks = nr of cells - 1
    // since there is a new block on each cell (overlapping blocks!) but the last one
    int blocks_in_x_dir = cells_in_x_dir - 1;
    int blocks_in_y_dir = cells_in_y_dir - 1;

    // compute gradient strengths per cell
    int descriptorDataIdx = 0;
    int cellx = 0;
    int celly = 0;

    for (int blockx=0; blockx<blocks_in_x_dir; blockx++)
    {
        for (int blocky=0; blocky<blocks_in_y_dir; blocky++)
        {
            // 4 cells per block ...
            for (int cellNr=0; cellNr<4; cellNr++)
            {
                // compute corresponding cell nr
                int cellx = blockx;
                int celly = blocky;
                if (cellNr==1) celly++;
                if (cellNr==2) cellx++;
                if (cellNr==3)
                {
                    cellx++;
                    celly++;
                }

                for (int bin=0; bin<gradientBinSize; bin++)
                {
                    float gradientStrength = descriptorValues[ descriptorDataIdx ];
                    descriptorDataIdx++;

                    gradientStrengths[celly][cellx][bin] += gradientStrength;

                } // for (all bins)
                // note: overlapping blocks lead to multiple updates of this sum!
                // we therefore keep track how often a cell was updated,
                // to compute average gradient strengths
                cellUpdateCounter[celly][cellx]++;
            } // for (all cells)
        } // for (all block x pos)
    } // for (all block y pos)


    // compute average gradient strengths
    for (int celly=0; celly<cells_in_y_dir; celly++)
    {
        for (int cellx=0; cellx<cells_in_x_dir; cellx++)
        {
            float NrUpdatesForThisCell = (float)cellUpdateCounter[celly][cellx];
            // compute average gradient strenghts for each gradient bin direction
            for (int bin=0; bin<gradientBinSize; bin++)
            {
                gradientStrengths[celly][cellx][bin] /= NrUpdatesForThisCell;
            }
        }
    }

    // draw cells
    for (int celly=0; celly<cells_in_y_dir; celly++)
    {
        for (int cellx=0; cellx<cells_in_x_dir; cellx++)
        {
            int drawX = cellx * cellSize.width;
            int drawY = celly * cellSize.height;

            int mx = drawX + cellSize.width/2;
            int my = drawY + cellSize.height/2;

            rectangle(visual_image, Point(drawX*scaleFactor,drawY*scaleFactor), Point((drawX+cellSize.width)*scaleFactor,(drawY+cellSize.height)*scaleFactor), CV_RGB(100,100,100), 1);

            // draw in each cell all 9 gradient strengths
            for (int bin=0; bin<gradientBinSize; bin++)
            {
                float currentGradStrength = gradientStrengths[celly][cellx][bin];

                // no line to draw?
                if (currentGradStrength==0)
                    continue;

                float currRad = bin * radRangeForOneBin + radRangeForOneBin/2;

                float dirVecX = cos( currRad );
                float dirVecY = sin( currRad );
                float maxVecLen = cellSize.width/2;
                float scale = viz_factor; // just a visual_imagealization scale, to see the lines better

                // compute line coordinates
                float x1 = mx - dirVecX * currentGradStrength * maxVecLen * scale;
                float y1 = my - dirVecY * currentGradStrength * maxVecLen * scale;
                float x2 = mx + dirVecX * currentGradStrength * maxVecLen * scale;
                float y2 = my + dirVecY * currentGradStrength * maxVecLen * scale;

                // draw gradient visual_imagealization
                line(visual_image, Point(x1*scaleFactor,y1*scaleFactor), Point(x2*scaleFactor,y2*scaleFactor), CV_RGB(0,0,255), 1);
            } // for (all bins)
        } // for (cellx)
    } // for (celly)

    // don't forget to free memory allocated by helper data structures!
    for (int y=0; y<cells_in_y_dir; y++)
    {
      for (int x=0; x<cells_in_x_dir; x++)
      {
           delete[] gradientStrengths[y][x];
      }
      delete[] gradientStrengths[y];
      delete[] cellUpdateCounter[y];
    }
    delete[] gradientStrengths;
    delete[] cellUpdateCounter;

    return visual_image;
}

int _tmain(int argc, _TCHAR* argv[])
{
    // Check if arguments are given correct
    if( argc == 1 || argc != 3){
        printf( "Usage of SVM training software: \n"
                "svm_train.exe <positive_training_samples.txt> <negative_training_samples.txt> <resulting_model.xml>\n");
        return 0;
    }

    // ****************************************************************************************************************************************
    // PREPROCESSING
    // ****************************************************************************************************************************************

    // Retrieve data from input arguments
    string positive_file = argv[1];
    string negative_file = argv[2];
    string model_file = argv[3];

    // Create the HOG descriptor initialisation - configuration
    HOGDescriptor hog;
    Size window_size = Size(48,96); hog.winSize = window_size;
    Size cell_size = Size(8,8); hog.cellSize = cell_size; hog.blockStride = cell_size;
    Size block_size = Size(16,16); hog.blockSize = block_size;
    int scale_factor = 2, viz_factor = 3;

    // ****************************************************************************************************************************************
    // POSITIVE DATA - DESCRIPTORS TO RIGHT FORMAT
    // ****************************************************************************************************************************************

    // Retrieve a list of positive file names
    ifstream input (positive_file);
    string current_line;
    vector<string> filenames_positive;
    while ( getline(input, current_line) ){
        vector<string> line_elements;
        stringstream temp (current_line);
        string first_element;
        getline(temp, first_element, ' ');
        filenames_positive.push_back(first_element);
    }
    int number_pos_samples = filenames_positive.size();
    input.close();

    // For each positive file, compute the descriptor, visualise it and store the descriptor
    vector< vector<float> > all_positive_descriptors;
    for(int i = 0; i < filenames_positive.size(); i++){
        // Read and compute descriptors
        Mat original = imread(filenames_positive[i]);

        vector<float> single_image_descriptor;
        hog.compute(original, single_image_descriptor);

        // Visualise it
        Mat image_with_descriptors = get_hogdescriptor_visual_image(original, single_image_descriptor, window_size, cell_size, scale_factor, viz_factor);
        imshow("Visualize descriptors", image_with_descriptors);
        int key = waitKey(15);
        if( key == 27 ){
            cout << "Processing aborted by pressing the ESC key!" << endl;
            break;
        }

        // Store it
        all_positive_descriptors.push_back(single_image_descriptor);
    }

    // Convert the vector of vectors into the correct format for the positive samples
    Mat all_positive_descriptors_matrix (all_positive_descriptors.size(), all_positive_descriptors[0].size(), CV_32FC1);
    for (size_t i = 0; i < all_positive_descriptors.size(); i++) {
        for (size_t j = 0; j < all_positive_descriptors[i].size(); j++) {
            all_positive_descriptors_matrix.at<float>(i, j) = all_positive_descriptors[i][j];
        }
    }

    // Output descriptor size for debug purposes
    cout << "Descriptor size using preset parameters on this image = " << all_positive_descriptors[0].size() << endl;

    // ****************************************************************************************************************************************
    // NEGATIVE DATA - DESCRIPTORS TO RIGHT FORMAT
    // ****************************************************************************************************************************************

    // Retrieve a list of negative file names
    input.open(negative_file);
    vector<string> filenames_negative;
    while ( getline(input, current_line) ){
        vector<string> line_elements;
        stringstream temp (current_line);
        string first_element;
        getline(temp, first_element, ' ');
        filenames_negative.push_back(first_element);
    }
    int number_neg_samples = filenames_negative.size();
    input.close();

    // For each negative file, compute the descriptor, visualise it and store the descriptor
    vector< vector<float> > all_negative_descriptors;
    for(int i = 0; i < filenames_negative.size(); i++){
        // Read and compute descriptors
        Mat original = imread(filenames_negative[i]);

        vector<float> single_image_descriptor;
        hog.compute(original, single_image_descriptor);

        // Visualise it
        Mat image_with_descriptors = get_hogdescriptor_visual_image(original, single_image_descriptor, window_size, cell_size, scale_factor, viz_factor);
        imshow("Visualize descriptors", image_with_descriptors);
        int key = waitKey(15);
        if( key == 27 ){
            cout << "Processing aborted by pressing the ESC key!" << endl;
            break;
        }

        // Store it
        all_negative_descriptors.push_back(single_image_descriptor);
    }

    // Convert the vector of vectors into the correct format for the negative samples
    Mat all_negative_descriptors_matrix (all_negative_descriptors.size(), all_negative_descriptors[0].size(), CV_32FC1);
    for (size_t i = 0; i < all_negative_descriptors.size(); i++) {
        for (size_t j = 0; j < all_negative_descriptors[i].size(); j++) {
            all_negative_descriptors_matrix.at<float>(i, j) = all_negative_descriptors[i][j];
        }
    }

    // ****************************************************************************************************************************************
    // COMBINE BOTH SETS AND PROVIDE CORRECT LABELS + WEIGHTS WHEN PREFERRED
    // Only adapt weights if you know what the influence of this parameter is!
    // ****************************************************************************************************************************************

    Mat inputs, labels;
    Mat labels_pos = Mat::ones(number_pos_samples, 1, CV_32FC1);
    Mat labels_neg = Mat::ones(number_neg_samples, 1, CV_32FC1) * -1.0;

    vconcat(all_positive_descriptors_matrix, all_negative_descriptors_matrix, inputs);
    vconcat(labels_pos, labels_neg, labels);

    cv::Mat1f weights(1,2); weights(0,0) = 1; weights(0,1) = 1;

    // ****************************************************************************************************************************************
    // TRAIN A SVM WITH THIS DATA
    // ****************************************************************************************************************************************

    // Configuring the SVM for training purposes
    CvSVMParams params;
    params.svm_type = CvSVM::C_SVC;
    params.kernel_type = CvSVM::LINEAR;
    params.gamma = 20;
    params.degree = 0;
    params.coef0 = 0;
    params.C = 1000; //Take a large punishment for misclassification
    params.nu = 0.0;
    params.p = 0.0;
    params.term_crit = cvTermCriteria(CV_TERMCRIT_ITER, 1000, 1e-6);
    CvMat old_weights = weights; params.class_weights = &old_weights;

    // Train the SVM
    CvSVM SVM_model;
    SVM_model.train(inputs, labels, Mat(), Mat(), params);

    cout << "Training done!" << endl;
    cout << "Saving the SVM model!" << endl;

    stringstream store_location;
    store_location << model_file;

    SVM_model.save(store_location.str().c_str());

    return 0;
}