linemod_cpp_layout

#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/objdetect/objdetect.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <iterator>
#include <set>
#include <cstdio>
#include <iostream>

// Function prototypes
void subtractPlane(const cv::Mat& depth, cv::Mat& mask, std::vector<cv::Point>& chain, double f);

std::vector<cv::Point> maskFromTemplate(const std::vector<cv::linemod::Template>& templates,
                                        int num_modalities, cv::Point offset, cv::Size size,
                                        cv::Mat& mask, cv::Mat& dst);

void templateConvexHull(const std::vector<cv::linemod::Template>& templates,
                        int num_modalities, cv::Point offset, cv::Size size,
                        cv::Mat& dst);

void drawResponse(const std::vector<cv::linemod::Template>& templates,
                  int num_modalities, cv::Mat& dst, cv::Point offset, int T);

cv::Mat displayQuantized(const cv::Mat& quantized);

// Copy of cv_mouse from cv_utilities
class Mouse
{
public:
  static void start(const std::string& a_img_name)
  {
    cv::setMouseCallback(a_img_name, Mouse::cv_on_mouse, 0);
  }
  static int event(void)
  {
    int l_event = m_event;
    m_event = -1;
    return l_event;
  }
  static int x(void)
  {
    return m_x;
  }
  static int y(void)
  {
    return m_y;
  }

private:
  static void cv_on_mouse(int a_event, int a_x, int a_y, int, void *)
  {
    m_event = a_event;
    m_x = a_x;
    m_y = a_y;
  }

  static int m_event;
  static int m_x;
  static int m_y;
};
int Mouse::m_event;
int Mouse::m_x;
int Mouse::m_y;

static void help()
{
  printf("Usage: openni_demo [templates.yml]\n\n"
         "Place your object on a planar, featureless surface. With the mouse,\n"
         "frame it in the 'color' window and right click to learn a first template.\n"
         "Then press 'l' to enter online learning mode, and move the camera around.\n"
         "When the match score falls between 90-95%% the demo will add a new template.\n\n"
         "Keys:\n"
         "\t h   -- This help page\n"
         "\t l   -- Toggle online learning\n"
         "\t m   -- Toggle printing match result\n"
         "\t t   -- Toggle printing timings\n"
         "\t w   -- Write learned templates to disk\n"
         "\t [ ] -- Adjust matching threshold: '[' down,  ']' up\n"
         "\t q   -- Quit\n\n");
}

// Adapted from cv_timer in cv_utilities
class Timer
{
public:
  Timer() : start_(0), time_(0) {}

  void start()
  {
    start_ = cv::getTickCount();
  }

  void stop()
  {
    CV_Assert(start_ != 0);
    int64 end = cv::getTickCount();
    time_ += end - start_;
    start_ = 0;
  }

  double time()
  {
    double ret = time_ / cv::getTickFrequency();
    time_ = 0;
    return ret;
  }

private:
  int64 start_, time_;
};

// Functions to store detector and templates in single XML/YAML file
static cv::Ptr<cv::linemod::Detector> readLinemod(const std::string& filename)
{
  cv::Ptr<cv::linemod::Detector> detector = new cv::linemod::Detector;
  cv::FileStorage fs(filename, cv::FileStorage::READ);
  detector->read(fs.root());

  cv::FileNode fn = fs["classes"];
  for (cv::FileNodeIterator i = fn.begin(), iend = fn.end(); i != iend; ++i)
    detector->readClass(*i);

  return detector;
}

static void writeLinemod(const cv::Ptr<cv::linemod::Detector>& detector, const std::string& filename)
{
  cv::FileStorage fs(filename, cv::FileStorage::WRITE);
  detector->write(fs);

  std::vector<std::string> ids = detector->classIds();
  fs << "classes" << "[";
  for (int i = 0; i < (int)ids.size(); ++i)
  {
    fs << "{";
    detector->writeClass(ids[i], fs);
    fs << "}"; // current class
  }
  fs << "]"; // classes
}


int main(int argc, char * argv[])
{
  // Various settings and flags
  bool show_match_result = true;
  bool show_timings = false;
  bool learn_online = false;
  int num_classes = 0;
  int matching_threshold = 80;
  /// @todo Keys for changing these?
  cv::Size roi_size(200, 200);
  int learning_lower_bound = 90;
  int learning_upper_bound = 95;

  // Timers
  Timer extract_timer;
  Timer match_timer;

  // Initialize HighGUI
  help();
  cv::namedWindow("color");
  cv::namedWindow("normals");
  Mouse::start("color");

  // Initialize LINEMOD data structures
  cv::Ptr<cv::linemod::Detector> detector;
  std::string filename;
  if (argc == 1)
  {
    filename = "linemod_templates.yml";
    detector = cv::linemod::getDefaultLINEMOD();
  }
  else
  {
    detector = readLinemod(argv[1]);

    std::vector<std::string> ids = detector->classIds();
    num_classes = detector->numClasses();
    printf("Loaded %s with %d classes and %d templates\n",
           argv[1], num_classes, detector->numTemplates());
    if (!ids.empty())
    {
      printf("Class ids:\n");
      std::copy(ids.begin(), ids.end(), std::ostream_iterator<std::string>(std::cout, "\n"));
    }
  }
  int num_modalities = (int)detector->getModalities().size();

  // Open Kinect sensor
  cv::VideoCapture capture( CV_CAP_OPENNI );
  if (!capture.isOpened())
  {
    printf("Could not open OpenNI-capable sensor\n");
    return -1;
  }
  capture.set(CV_CAP_PROP_OPENNI_REGISTRATION, 1);
  double focal_length = capture.get(CV_CAP_OPENNI_DEPTH_GENERATOR_FOCAL_LENGTH);
  //printf("Focal length = %f\n", focal_length);

  // Main loop
  cv::Mat color, depth;
  for(;;)
  {
    // Capture next color/depth pair
    capture.grab();
    capture.retrieve(depth, CV_CAP_OPENNI_DEPTH_MAP);
    capture.retrieve(color, CV_CAP_OPENNI_BGR_IMAGE);

    std::vector<cv::Mat> sources;
    sources.push_back(color);
    sources.push_back(depth);
    cv::Mat display = color.clone();

    if (!learn_online)
    {
      cv::Point mouse(Mouse::x(), Mouse::y());
      int event = Mouse::event();

      // Compute ROI centered on current mouse location
      cv::Point roi_offset(roi_size.width / 2, roi_size.height / 2);
      cv::Point pt1 = mouse - roi_offset; // top left
      cv::Point pt2 = mouse + roi_offset; // bottom right

      if (event == CV_EVENT_RBUTTONDOWN)
      {
        // Compute object mask by subtracting the plane within the ROI
        std::vector<cv::Point> chain(4);
        chain[0] = pt1;
        chain[1] = cv::Point(pt2.x, pt1.y);
        chain[2] = pt2;
        chain[3] = cv::Point(pt1.x, pt2.y);
        cv::Mat mask;
        subtractPlane(depth, mask, chain, focal_length);

        cv::imshow("mask", mask);

        // Extract template
        std::string class_id = cv::format("class%d", num_classes);
        cv::Rect bb;
        extract_timer.start();
        int template_id = detector->addTemplate(sources, class_id, mask, &bb);
        extract_timer.stop();
        if (template_id != -1)
        {
          printf("*** Added template (id %d) for new object class %d***\n",
                 template_id, num_classes);
          //printf("Extracted at (%d, %d) size %dx%d\n", bb.x, bb.y, bb.width, bb.height);
        }

        ++num_classes;
      }

      // Draw ROI for display
      cv::rectangle(display, pt1, pt2, cv::Scalar(0,0,0), 3);
      cv::rectangle(display, pt1, pt2, cv::Scalar(0,255,255), 1);
    }

    // Perform matching
    std::vector<cv::linemod::Match> matches;
    std::vector<std::string> class_ids;
    std::vector<cv::Mat> quantized_images;
    match_timer.start();
    detector->match(sources, (float)matching_threshold, matches, class_ids, quantized_images);
    match_timer.stop();

    int classes_visited = 0;
    std::set<std::string> visited;

    for (int i = 0; (i < (int)matches.size()) && (classes_visited < num_classes); ++i)
    {
      cv::linemod::Match m = matches[i];

      if (visited.insert(m.class_id).second)
      {
        ++classes_visited;

        if (show_match_result)
        {
          printf("Similarity: %5.1f%%; x: %3d; y: %3d; class: %s; template: %3d\n",
                 m.similarity, m.x, m.y, m.class_id.c_str(), m.template_id);
        }

        // Draw matching template
        const std::vector<cv::linemod::Template>& templates = detector->getTemplates(m.class_id, m.template_id);
        drawResponse(templates, num_modalities, display, cv::Point(m.x, m.y), detector->getT(0));

        if (learn_online == true)
        {
          /// @todo Online learning possibly broken by new gradient feature extraction,
          /// which assumes an accurate object outline.

          // Compute masks based on convex hull of matched template
          cv::Mat color_mask, depth_mask;
          std::vector<cv::Point> chain = maskFromTemplate(templates, num_modalities,
                                                          cv::Point(m.x, m.y), color.size(),
                                                          color_mask, display);
          subtractPlane(depth, depth_mask, chain, focal_length);

          cv::imshow("mask", depth_mask);

          // If pretty sure (but not TOO sure), add new template
          if (learning_lower_bound < m.similarity && m.similarity < learning_upper_bound)
          {
            extract_timer.start();
            int template_id = detector->addTemplate(sources, m.class_id, depth_mask);
            extract_timer.stop();
            if (template_id != -1)
            {
              printf("*** Added template (id %d) for existing object class %s***\n",
                     template_id, m.class_id.c_str());
            }
          }
        }
      }
    }

    if (show_match_result && matches.empty())
      printf("No matches found...\n");
    if (show_timings)
    {
      printf("Training: %.2fs\n", extract_timer.time());
      printf("Matching: %.2fs\n", match_timer.time());
    }
    if (show_match_result || show_timings)
      printf("------------------------------------------------------------\n");

    cv::imshow("color", display);
    cv::imshow("normals", quantized_images[1]);

    cv::FileStorage fs;
    char key = (char)cv::waitKey(10);
    if( key == 'q' )
        break;

    switch (key)
    {
      case 'h':
        help();
        break;
      case 'm':
        // toggle printing match result
        show_match_result = !show_match_result;
        printf("Show match result %s\n", show_match_result ? "ON" : "OFF");
        break;
      case 't':
        // toggle printing timings
        show_timings = !show_timings;
        printf("Show timings %s\n", show_timings ? "ON" : "OFF");
        break;
      case 'l':
        // toggle online learning
        learn_online = !learn_online;
        printf("Online learning %s\n", learn_online ? "ON" : "OFF");
        break;
      case '[':
        // decrement threshold
        matching_threshold = std::max(matching_threshold - 1, -100);
        printf("New threshold: %d\n", matching_threshold);
        break;
      case ']':
        // increment threshold
        matching_threshold = std::min(matching_threshold + 1, +100);
        printf("New threshold: %d\n", matching_threshold);
        break;
      case 'w':
        // write model to disk
        writeLinemod(detector, filename);
        printf("Wrote detector and templates to %s\n", filename.c_str());
        break;
      default:
        ;
    }
  }
  return 0;
}

static void reprojectPoints(const std::vector<cv::Point3d>& proj, std::vector<cv::Point3d>& real, double f)
{
  real.resize(proj.size());
  double f_inv = 1.0 / f;

  for (int i = 0; i < (int)proj.size(); ++i)
  {
    double Z = proj[i].z;
    real[i].x = (proj[i].x - 320.) * (f_inv * Z);
    real[i].y = (proj[i].y - 240.) * (f_inv * Z);
    real[i].z = Z;
  }
}

static void filterPlane(cv::Mat & ap_depth, std::vector<cv::Mat> & a_masks, std::vector<cv::Point> & a_chain, double f)
{
  const int l_num_cost_pts = 200;

  float l_thres = 4;

  cv::Mat lp_mask = cv::Mat::zeros(ap_depth.size(), CV_8UC1);

  std::vector<cv::Point> l_chain_vector;

  float l_chain_length = 0;
  float * lp_seg_length = new float[a_chain.size()];

  for (int l_i = 0; l_i < (int)a_chain.size(); ++l_i)
  {
    float x_diff = (float)(a_chain[(l_i + 1) % a_chain.size()].x - a_chain[l_i].x);
    float y_diff = (float)(a_chain[(l_i + 1) % a_chain.size()].y - a_chain[l_i].y);
    lp_seg_length[l_i] = sqrt(x_diff*x_diff + y_diff*y_diff);
    l_chain_length += lp_seg_length[l_i];
  }
  for (int l_i = 0; l_i < (int)a_chain.size(); ++l_i)
  {
    if (lp_seg_length[l_i] > 0)
    {
      int l_cur_num = cvRound(l_num_cost_pts * lp_seg_length[l_i] / l_chain_length);
      float l_cur_len = lp_seg_length[l_i] / l_cur_num;

      for (int l_j = 0; l_j < l_cur_num; ++l_j)
      {
        float l_ratio = (l_cur_len * l_j / lp_seg_length[l_i]);

        cv::Point l_pts;

        l_pts.x = cvRound(l_ratio * (a_chain[(l_i + 1) % a_chain.size()].x - a_chain[l_i].x) + a_chain[l_i].x);
        l_pts.y = cvRound(l_ratio * (a_chain[(l_i + 1) % a_chain.size()].y - a_chain[l_i].y) + a_chain[l_i].y);

        l_chain_vector.push_back(l_pts);
      }
    }
  }
  std::vector<cv::Point3d> lp_src_3Dpts(l_chain_vector.size());

  for (int l_i = 0; l_i < (int)l_chain_vector.size(); ++l_i)
  {
    lp_src_3Dpts[l_i].x = l_chain_vector[l_i].x;
    lp_src_3Dpts[l_i].y = l_chain_vector[l_i].y;
    lp_src_3Dpts[l_i].z = ap_depth.at<int>(cvRound(lp_src_3Dpts[l_i].y), cvRound(lp_src_3Dpts[l_i].x));
    //lp_mask.at<uchar>((int)lp_src_3Dpts[l_i].Y,(int)lp_src_3Dpts[l_i].X) = 255;
  }

  reprojectPoints(lp_src_3Dpts, lp_src_3Dpts, f);

  cv::Mat lp_pts = cv::Mat((int)l_chain_vector.size(), 4, CV_32F);
  cv::Mat lp_v = cv::Mat(4, 4, CV_32F);
  cv::Mat lp_w = cv::Mat(4, 1, CV_32F);

  for (int l_i = 0; l_i < (int)l_chain_vector.size(); ++l_i)
  {
    lp_pts.at<float>(l_i, 0) = (float)lp_src_3Dpts[l_i].x;
    lp_pts.at<float>(l_i, 1) = (float)lp_src_3Dpts[l_i].y;
    lp_pts.at<float>(l_i, 2) = (float)lp_src_3Dpts[l_i].z;
    lp_pts.at<float>(l_i, 3) = 1.0f;
  }
  cv::SVD::compute(lp_pts, lp_w, cv::Mat(), lp_v);

  float l_n[4] = {lp_v.at<float>(0, 3),
                  lp_v.at<float>(1, 3),
                  lp_v.at<float>(2, 3),
                  lp_v.at<float>(3, 3)};

  float l_norm = sqrt(l_n[0] * l_n[0] + l_n[1] * l_n[1] + l_n[2] * l_n[2]);

  l_n[0] /= l_norm;
  l_n[1] /= l_norm;
  l_n[2] /= l_norm;
  l_n[3] /= l_norm;

  float l_max_dist = 0;

  for (int l_i = 0; l_i < (int)l_chain_vector.size(); ++l_i)
  {
    float l_dist =  l_n[0] * lp_pts.at<float>(l_i, 0) +
                    l_n[1] * lp_pts.at<float>(l_i, 1) +
                    l_n[2] * lp_pts.at<float>(l_i, 2) +
                    l_n[3] * lp_pts.at<float>(l_i, 3);

    if (fabs(l_dist) > l_max_dist)
      l_max_dist = l_dist;
  }
  //std::cerr << "plane: " << l_n[0] << ";" << l_n[1] << ";" << l_n[2] << ";" << l_n[3] << " maxdist: " << l_max_dist << " end" << std::endl;
  int l_minx = ap_depth.cols;
  int l_miny = ap_depth.rows;
  int l_maxx = 0;
  int l_maxy = 0;

  for (int l_i = 0; l_i < (int)a_chain.size(); ++l_i)
  {
    l_minx = std::min(l_minx, a_chain[l_i].x);
    l_miny = std::min(l_miny, a_chain[l_i].y);
    l_maxx = std::max(l_maxx, a_chain[l_i].x);
    l_maxy = std::max(l_maxy, a_chain[l_i].y);
  }
  int l_w = l_maxx - l_minx + 1;
  int l_h = l_maxy - l_miny + 1;
  int l_nn = (int)a_chain.size();

  std::vector<cv::Point> lp_chain;

  for (int l_i = 0; l_i < l_nn; ++l_i)
    lp_chain.push_back(a_chain[l_i]);

  // Create the proper structure for the fillPoly function
  std::vector< std::vector<cv::Point> > filled;
  filled.push_back(lp_chain);
  cv::fillPoly(lp_mask, filled, cv::Scalar(255, 255, 255));

  //cv::imshow(lp_mask,"hallo1");

  std::vector<cv::Point3d> lp_dst_3Dpts(l_h * l_w);

  int l_ind = 0;

  for (int l_r = 0; l_r < l_h; ++l_r)
  {
    for (int l_c = 0; l_c < l_w; ++l_c)
    {
      lp_dst_3Dpts[l_ind].x = l_c + l_minx;
      lp_dst_3Dpts[l_ind].y = l_r + l_miny;
      lp_dst_3Dpts[l_ind].z = ap_depth.at<uchar>(l_r + l_miny, l_c + l_minx);
      ++l_ind;
    }
  }
  reprojectPoints(lp_dst_3Dpts, lp_dst_3Dpts, f);

  l_ind = 0;

  for (int l_r = 0; l_r < l_h; ++l_r)
  {
    for (int l_c = 0; l_c < l_w; ++l_c)
    {
      float l_dist = (float)(l_n[0] * lp_dst_3Dpts[l_ind].x + l_n[1] * lp_dst_3Dpts[l_ind].y + lp_dst_3Dpts[l_ind].z * l_n[2] + l_n[3]);

      ++l_ind;

      if (lp_mask.at<uchar>(l_r + l_miny, l_c + l_minx) != 0)
      {
        if (fabs(l_dist) < std::max(l_thres, (l_max_dist * 2.0f)))
        {
          for (int l_p = 0; l_p < (int)a_masks.size(); ++l_p)
          {
            int l_col = cvRound((l_c + l_minx) / (l_p + 1.0));
            int l_row = cvRound((l_r + l_miny) / (l_p + 1.0));

            a_masks[l_p].at<uchar>(l_row, l_col) = 0;
          }
        }
        else
        {
          for (int l_p = 0; l_p < (int)a_masks.size(); ++l_p)
          {
            int l_col = cvRound((l_c + l_minx) / (l_p + 1.0));
            int l_row = cvRound((l_r + l_miny) / (l_p + 1.0));

            a_masks[l_p].at<uchar>(l_row, l_col) = 255;
          }
        }
      }
    }
  }
}

void subtractPlane(const cv::Mat& depth, cv::Mat& mask, std::vector<cv::Point>& chain, double f)
{
  mask = cv::Mat::zeros(depth.rows, depth.cols, CV_8U);
  std::vector<cv::Mat> tmp;
  tmp.push_back(mask);
  cv::Mat temp_depth = depth.clone();
  filterPlane(temp_depth, tmp, chain, f);
}

std::vector<cv::Point> maskFromTemplate(const std::vector<cv::linemod::Template>& templates,
                                      int num_modalities, cv::Point offset, cv::Size size,
                                      cv::Mat& mask, cv::Mat& dst)
{
  templateConvexHull(templates, num_modalities, offset, size, mask);

  const int OFFSET = 30;
  cv::dilate(mask, mask, cv::Mat(), cv::Point(-1,-1), OFFSET);

  cv::Mat mask_copy = mask.clone();
  std::vector<cv::Mat> contours;
  cv::findContours(mask_copy, contours, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE);

  std::vector<cv::Point> l_pts1 = contours[0];  // to use as input to cv_primesensor::filter_plane

  // Create the lines from the contour set, on the dst matrix
  // Add first element to the end to make the contour complete
  l_pts1.push_back(l_pts1[0]);

  for(size_t i = 0; i < l_pts1.size() - 1; i++)
  {
    cv::Point pt0 = l_pts1[i];
    cv::Point pt1 = l_pts1[i+1];
    cv::line(dst, pt0, pt1, cv::Scalar(0,255,0), 2);
  }

  return l_pts1;
}

// Adapted from cv_show_angles
cv::Mat displayQuantized(const cv::Mat& quantized)
{
  cv::Mat color(quantized.size(), CV_8UC3);
  for (int r = 0; r < quantized.rows; ++r)
  {
    const uchar* quant_r = quantized.ptr(r);
    cv::Vec3b* color_r = color.ptr<cv::Vec3b>(r);

    for (int c = 0; c < quantized.cols; ++c)
    {
      cv::Vec3b& bgr = color_r[c];
      switch (quant_r[c])
      {
        case 0:   bgr[0]=  0; bgr[1]=  0; bgr[2]=  0;    break;
        case 1:   bgr[0]= 55; bgr[1]= 55; bgr[2]= 55;    break;
        case 2:   bgr[0]= 80; bgr[1]= 80; bgr[2]= 80;    break;
        case 4:   bgr[0]=105; bgr[1]=105; bgr[2]=105;    break;
        case 8:   bgr[0]=130; bgr[1]=130; bgr[2]=130;    break;
        case 16:  bgr[0]=155; bgr[1]=155; bgr[2]=155;    break;
        case 32:  bgr[0]=180; bgr[1]=180; bgr[2]=180;    break;
        case 64:  bgr[0]=205; bgr[1]=205; bgr[2]=205;    break;
        case 128: bgr[0]=230; bgr[1]=230; bgr[2]=230;    break;
        case 255: bgr[0]=  0; bgr[1]=  0; bgr[2]=255;    break;
        default:  bgr[0]=  0; bgr[1]=255; bgr[2]=  0;    break;
      }
    }
  }

  return color;
}

// Adapted from cv_line_template::convex_hull
void templateConvexHull(const std::vector<cv::linemod::Template>& templates,
                        int num_modalities, cv::Point offset, cv::Size size,
                        cv::Mat& dst)
{
  std::vector<cv::Point> points;
  for (int m = 0; m < num_modalities; ++m)
  {
    for (int i = 0; i < (int)templates[m].features.size(); ++i)
    {
      cv::linemod::Feature f = templates[m].features[i];
      points.push_back(cv::Point(f.x, f.y) + offset);
    }
  }

  std::vector<cv::Point> hull;
  cv::convexHull(points, hull);

  dst = cv::Mat::zeros(size, CV_8U);
  const int hull_count = (int)hull.size();
  const cv::Point* hull_pts = &hull[0];
  cv::fillPoly(dst, &hull_pts, &hull_count, 1, cv::Scalar(255));
}

void drawResponse(const std::vector<cv::linemod::Template>& templates,
                  int num_modalities, cv::Mat& dst, cv::Point offset, int T)
{
  static const cv::Scalar COLORS[5] = { CV_RGB(0, 0, 255),
                                        CV_RGB(0, 255, 0),
                                        CV_RGB(255, 255, 0),
                                        CV_RGB(255, 140, 0),
                                        CV_RGB(255, 0, 0) };

  for (int m = 0; m < num_modalities; ++m)
  {
    // NOTE: Original demo recalculated max response for each feature in the TxT
    // box around it and chose the display color based on that response. Here
    // the display color just depends on the modality.
    cv::Scalar color = COLORS[m];

    for (int i = 0; i < (int)templates[m].features.size(); ++i)
    {
      cv::linemod::Feature f = templates[m].features[i];
      cv::Point pt(f.x + offset.x, f.y + offset.y);
      cv::circle(dst, pt, T / 2, color);
    }
  }
}