Untitled

#include <bits/stdc++.h>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>

using namespace cv;
using namespace cv::dnn;
using namespace std;

/* checks if a cropped ear image is left oriented */
/* input is a 96x96 image */
bool isLeft(Mat img) {
    static bool first = true;
    static Net net;

    if(first) {
        first = false;
        net = readNetFromTensorflow("model-side.pb");
        if(net.empty()) {
            cerr << "ERROR: Could not load the CNN for side classification" << endl;
            exit(1);
        }
    }

    Mat inputBlob = blobFromImage(img);
    inputBlob /= 255.0;
    net.setInput(inputBlob, "Placeholder");
    Mat result = net.forward("side_/out/MatMul");

    cout << result.at<float>(0,0) << " " << result.at<float>(0,1) << endl;

    return result.at<float>(0,0) > result.at<float>(0,1);
}

/* detects landmarks in a left oriented cropped ear image */
/* input is a 96x96 image */
typedef enum __stage {FIRST, SECOND} stage;
void detectLandmarks(Mat img, vector<Point2d> &ldmk, stage s) {
    static bool first = true;
    static Net net1, net2;

    if(first) {
        first = false;
        net1 = readNetFromTensorflow("model-stage1.pb");
        net2 = readNetFromTensorflow("model-stage2.pb");
        if(net1.empty() || net2.empty()) {
            cerr << "ERROR: Could not load the CNNs for landmark detection" << endl;
            exit(1);
        }
    }

    Mat result, inputBlob = blobFromImage(img);
    inputBlob /= 255.0;
    if(s == FIRST) {
        net1.setInput(inputBlob);
        result = net1.forward("ear_ang45_3_sca20_r_tra20_r_e/out/MatMul");
    }
    else {
        net2.setInput(inputBlob);
        result = net2.forward("ear_ang45_3_sca20_r_tra20_r_e/out/MatMul");
    }
    result *= 48;
    result += 48;

    ldmk.clear();
    for(int i=0; i < 55; i++) {
        ldmk.push_back(Point2d(result.at<float>(0,i*2), result.at<float>(0,i*2+1)));
        cout << ldmk[i].x << "," << ldmk[i].y << " ";
    }
    cout << endl;
}

/* extracts descriptor from a left oriented normalized ear image */
/* input is a 128x128 image */
Mat extractDescriptor(Mat img) {
    static bool first = true;
    static Net net;

    if(first) {
        first = false;
        net = readNetFromTensorflow("model-descriptor.pb");
        if(net.empty()) {
            cerr << "ERROR: Could not load the CNN for side classification" << endl;
            exit(1);
        }
    }

    Mat inputBlob = blobFromImage(img);
    inputBlob /= 255.0;
    net.setInput(inputBlob);
    Mat result = net.forward("MatMul");

    for(int i=0; i < result.dims; i++)
        cout << result.size[i] << " ";
    cout << endl;

    return result.clone();
}

/* interpolate ear image */
void normalizeImage(Mat image, Mat &output, int size, double scale, double ang, double cx, double cy) {
    output.create(size, size, CV_8UC1);
    double ratio = (scale/((size-1)/2.0));
    for(int i=0; i < size; i++)
        for(int j=0; j < size; j++) {
            double xt = ratio*(j-(size-1)/2.0), yt = ratio*(i-(size-1)/2.0);
            double x = xt*cos(ang)-yt*sin(ang)+cx, y = xt*sin(ang)+yt*cos(ang)+cy;
            int u = x, v = y;
            double ul = x-u, vl = y-v;
            int u1 = u+1, v1 = v+1;
            u = max(0, min(image.cols-1, u));
            u1 = max(0, min(image.cols-1, u1));
            v = max(0, min(image.rows-1, v));
            v1 = max(0, min(image.rows-1, v1));

            double tmp = image.at<uchar>(v,u)*(1.0-ul)*(1.0-vl) + image.at<uchar>(v,u1)*ul*(1.0-vl) + image.at<uchar>(v1,u)*(1.0-ul)*vl + image.at<uchar>(v1,u1)*ul*vl;
            output.at<uchar>(i,j) = max(0.0,min(255.0,tmp));
        }
}

/* adjust pose parameters using landmarks */
void adjustParameters(vector<Point2d> ldmk, int size, double &scale, double &ang, double &cx, double &cy) {
    double ratio = (scale/((size-1)/2.0));

    /* align landmark coordinate space to the original image */
    /* compute principal components and bounding box for aligned landmarks */
    Mat data_pts = Mat(55, 2, CV_64FC1);
    Point2d tl = {DBL_MAX, DBL_MAX}, br = {0.0, 0.0};
    for(int i=0; i < 55; i++) {
        double xt = ratio*(ldmk[i].x-(size-1)/2.0), yt = ratio*(ldmk[i].y-(size-1)/2.0);
        data_pts.at<double>(i, 0) = xt*cos(ang)-yt*sin(ang)+cx;
        data_pts.at<double>(i, 1) = xt*sin(ang)+yt*cos(ang)+cy;
        tl.x = min(tl.x, data_pts.at<double>(i, 0));
        tl.y = min(tl.y, data_pts.at<double>(i, 1));
        br.x = max(br.x, data_pts.at<double>(i, 0));
        br.y = max(br.y, data_pts.at<double>(i, 1));
    }
    PCA pca_analysis(data_pts, Mat(), CV_PCA_DATA_AS_ROW);

    /* set orientation of the ear as the direction of the first principal component */
    double angle = atan2(pca_analysis.eigenvectors.at<double>(0, 1), pca_analysis.eigenvectors.at<double>(0, 0));
    while(angle < 0.0)
        angle += 2.0*M_PI;
    if(angle > M_PI)
        angle -= M_PI;
    ang = angle-M_PI/2.0;

    /* set scale as two times the deviation of the first principal component */
    scale = 2.0*sqrt(pca_analysis.eigenvalues.at<double>(0, 0));

    /* set center as the center of the bounding box */
    cx = (tl.x+br.x)/2.0;
    cy = (tl.y+br.y)/2.0;
}

/* magic function */
int main(int argc, char **argv) {
    const int PREPROC_SIZE = 96;                                    // image size for side classification and landmark detection
    const int DESCRIPT_SIZE = 128;                                  // image size for cnn description

    /* load cropped ear image */
    Mat image = imread(argv[1], IMREAD_GRAYSCALE), interpolated;

    double scale = (max(image.rows, image.cols)-1.0)/2.0;           // set largest image axis as initial scale
    double ang = 0.0;                                               // orientation unknown
    double cx = (image.cols-1.0)/2.0, cy = (image.rows-1.0)/2.0;    // use center of the image as initial ear center

    /* normalize image using initial guesses for ear location, size and orientation */
    normalizeImage(image, interpolated, PREPROC_SIZE, scale, ang, cx, cy);

    while(waitKey(10) < 0)
        imshow("img", interpolated);

    /* check if the ear is left-oriented and flip it if it is not */
    if(!isLeft(interpolated)) {
        Mat tmp;
        flip(interpolated, tmp, 1);
        tmp.copyTo(interpolated);
        flip(image, tmp, 1);
        tmp.copyTo(image);
    }

    while(waitKey(10) < 0)
        imshow("img", interpolated);

    vector<Point2d> landmarks;                                      // vector with 2d coordinates of ear landmarks

    /* detect landmarks using stage 1 (robust to intense variations) */
    detectLandmarks(interpolated, landmarks, FIRST);

    for(int i=0; i < landmarks.size(); i++)
        circle(interpolated, Point(landmarks[i].x, landmarks[i].y), 2, Scalar(255), -1);

    while(waitKey(10) < 0)
        imshow("img", interpolated);

    /* normalize image using adjusted parameters */
    adjustParameters(landmarks, PREPROC_SIZE, scale, ang, cx, cy);
    normalizeImage(image, interpolated, DESCRIPT_SIZE, scale, ang, cx, cy);

    while(waitKey(10) < 0)
        imshow("img", interpolated);

    /* extract discriminant descriptor */
    Mat descriptor = extractDescriptor(interpolated);
}