Advertisement
alexeyspizhevoy

Untitled

Oct 14th, 2011
809
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 3.72 KB | None | 0 0
  1. // place the containing folder in your
  2. //  libjacket/examples/ directory
  3. //
  4. // Chris McClanahan - 2011
  5.  
  6. #include <iostream>
  7. #include <fstream>
  8. #include <stdio.h>
  9. #include <math.h>
  10. #include <jacket.h>
  11. #include <jacket_gfx.h>
  12. #include <cv.h>
  13. #include <cxcore.h>
  14. #include <highgui.h>
  15. #include "opencv2/gpu/gpu.hpp"
  16.  
  17.  
  18.  
  19. using namespace jkt;
  20. using namespace std;
  21. using namespace cv;
  22. using namespace gpu;
  23.  
  24.  
  25. const int ksz = 32;
  26. Mat ker;
  27.  
  28. void gen(Mat& mat, int rows, int cols, int type, Scalar low, Scalar high) {
  29.     mat.create(rows, cols, type);
  30.     RNG rng(0);
  31.     rng.fill(mat, RNG::UNIFORM, low, high);
  32. }
  33.  
  34. int main() {
  35.     ginfo();
  36.  
  37.     Mat src, dst;
  38.     GpuMat d_src, d_dst, d_ker;
  39.     gen(ker, ksz, ksz, CV_32F, -1, 1);
  40.     int runs = 100;
  41.  
  42.     for (int size = 512; size < 4000; size += 512) {
  43.         cout  << "size: " << size << "x" << size << endl;
  44.         gen(src, size, size, CV_32FC1, 0, 1);
  45.  
  46.         try {
  47.             // OpenCV
  48.             {
  49.                 dst.create(size, size, CV_32FC1);
  50.                 d_src = src;
  51.                 d_dst.create(size, size, CV_32FC1);
  52.                 d_ker = ker;
  53.  
  54.                 ConvolveBuf buf;
  55.                 convolve(d_src, d_ker, d_dst, false, buf); // Warm up
  56.                 int64 t = getTickCount();
  57.                 for (int i = 0; i < runs; ++i)
  58.                     convolve(d_src, d_ker, d_dst, false, buf);
  59.                 cout << "  cv-gpu: " << (getTickCount() - t) / getTickFrequency() / (float)runs << endl;
  60.             }
  61.  
  62.             // LibJacket
  63.             {
  64.                 // extract cv image
  65.                 Mat jimg;
  66.                 src.convertTo(jimg, CV_32FC1);
  67.                 float* fgray = (float*)jimg.data;
  68.                 f32 I1 = f32(fgray, jimg.rows, jimg.cols);
  69.                 unsigned dimsb[] = {ksz, ksz};
  70.  
  71.                 // gpu
  72.                 f32 jker = f32((float*)ker.data, ker.rows, ker.cols);
  73.                 f32 jdst = conv2(I1, jker, jktConvValid);                
  74.                 gsync();
  75.                 int64 t = getTickCount();
  76.                 for (int i = 0; i < runs; ++i) {
  77.                     jdst = conv2(I1, jker, jktConvValid);
  78.                     gsync();
  79.                 }
  80.                 cout << "  jacket: " << (getTickCount() - t) / getTickFrequency() / (float)runs << endl;
  81.             }
  82.  
  83.         } catch (gexception& e) {
  84.             cout << e.what() << endl;
  85.         }
  86.     }
  87.  
  88.     return 0;
  89. }
  90.  
  91. /*
  92. Results for ksz = 32
  93. ====================
  94. Libjacket v1.0.1 (build dd66add) by AccelerEyes
  95. CUDA Driver: 270.81
  96. CUDA Toolkit: v4.0
  97.  
  98. CUDA capable devices detected:
  99. GPU0 Tesla C2050 / C2070, 2652 MB, Compute 2.0 (single,double) (in use)
  100. size: 512x512
  101.   cv-gpu: 0.00202607
  102.   jacket: 0.00790995
  103. size: 1024x1024
  104.   cv-gpu: 0.00767946
  105.   jacket: 0.0321756
  106. size: 1536x1536
  107.   cv-gpu: 0.00893694
  108.   jacket: 0.0735304
  109. size: 2048x2048
  110.   cv-gpu: 0.0171747
  111.   jacket: 0.131005
  112. size: 2560x2560
  113.   cv-gpu: 0.0177356
  114.   jacket: 0.205862
  115. size: 3072x3072
  116.   cv-gpu: 0.0278928
  117.   jacket: 0.297097
  118. size: 3584x3584
  119.   cv-gpu: 0.0268914
  120.   jacket: 0.404806
  121.  
  122. Results for ksz = 64
  123. ====================
  124. Libjacket v1.0.1 (build dd66add) by AccelerEyes
  125. CUDA Driver: 270.81
  126. CUDA Toolkit: v4.0
  127.  
  128. CUDA capable devices detected:
  129. GPU0 Tesla C2050 / C2070, 2652 MB, Compute 2.0 (single,double) (in use)
  130. size: 512x512
  131.   cv-gpu: 0.0021476
  132.   jacket: 0.00167873
  133. size: 1024x1024
  134.   cv-gpu: 0.00802897
  135.   jacket: 0.00602738
  136. size: 1536x1536
  137.   cv-gpu: 0.00899348
  138.   jacket: 0.00626257
  139. size: 2048x2048
  140.   cv-gpu: 0.0173659
  141.   jacket: 0.0232162
  142. size: 2560x2560
  143.   cv-gpu: 0.0177426
  144.   jacket: 0.0236838
  145. size: 3072x3072
  146.   cv-gpu: 0.0264366
  147.   jacket: 0.0242362
  148. size: 3584x3584
  149.   cv-gpu: 0.0270701
  150.   jacket: 0.0249083
  151. */
  152.  
  153.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement