Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // place the containing folder in your
- // libjacket/examples/ directory
- //
- // Chris McClanahan - 2011
- #include <iostream>
- #include <fstream>
- #include <stdio.h>
- #include <math.h>
- #include <jacket.h>
- #include <jacket_gfx.h>
- #include <cv.h>
- #include <cxcore.h>
- #include <highgui.h>
- #include "opencv2/gpu/gpu.hpp"
- using namespace jkt;
- using namespace std;
- using namespace cv;
- using namespace gpu;
- const int ksz = 32;
- Mat ker;
- void gen(Mat& mat, int rows, int cols, int type, Scalar low, Scalar high) {
- mat.create(rows, cols, type);
- RNG rng(0);
- rng.fill(mat, RNG::UNIFORM, low, high);
- }
- int main() {
- ginfo();
- Mat src, dst;
- GpuMat d_src, d_dst, d_ker;
- gen(ker, ksz, ksz, CV_32F, -1, 1);
- int runs = 100;
- for (int size = 512; size < 4000; size += 512) {
- cout << "size: " << size << "x" << size << endl;
- gen(src, size, size, CV_32FC1, 0, 1);
- try {
- // OpenCV
- {
- dst.create(size, size, CV_32FC1);
- d_src = src;
- d_dst.create(size, size, CV_32FC1);
- d_ker = ker;
- ConvolveBuf buf;
- convolve(d_src, d_ker, d_dst, false, buf); // Warm up
- int64 t = getTickCount();
- for (int i = 0; i < runs; ++i)
- convolve(d_src, d_ker, d_dst, false, buf);
- cout << " cv-gpu: " << (getTickCount() - t) / getTickFrequency() / (float)runs << endl;
- }
- // LibJacket
- {
- // extract cv image
- Mat jimg;
- src.convertTo(jimg, CV_32FC1);
- float* fgray = (float*)jimg.data;
- f32 I1 = f32(fgray, jimg.rows, jimg.cols);
- unsigned dimsb[] = {ksz, ksz};
- // gpu
- f32 jker = f32((float*)ker.data, ker.rows, ker.cols);
- f32 jdst = conv2(I1, jker, jktConvValid);
- gsync();
- int64 t = getTickCount();
- for (int i = 0; i < runs; ++i) {
- jdst = conv2(I1, jker, jktConvValid);
- gsync();
- }
- cout << " jacket: " << (getTickCount() - t) / getTickFrequency() / (float)runs << endl;
- }
- } catch (gexception& e) {
- cout << e.what() << endl;
- }
- }
- return 0;
- }
- /*
- Results for ksz = 32
- ====================
- Libjacket v1.0.1 (build dd66add) by AccelerEyes
- CUDA Driver: 270.81
- CUDA Toolkit: v4.0
- CUDA capable devices detected:
- GPU0 Tesla C2050 / C2070, 2652 MB, Compute 2.0 (single,double) (in use)
- size: 512x512
- cv-gpu: 0.00202607
- jacket: 0.00790995
- size: 1024x1024
- cv-gpu: 0.00767946
- jacket: 0.0321756
- size: 1536x1536
- cv-gpu: 0.00893694
- jacket: 0.0735304
- size: 2048x2048
- cv-gpu: 0.0171747
- jacket: 0.131005
- size: 2560x2560
- cv-gpu: 0.0177356
- jacket: 0.205862
- size: 3072x3072
- cv-gpu: 0.0278928
- jacket: 0.297097
- size: 3584x3584
- cv-gpu: 0.0268914
- jacket: 0.404806
- Results for ksz = 64
- ====================
- Libjacket v1.0.1 (build dd66add) by AccelerEyes
- CUDA Driver: 270.81
- CUDA Toolkit: v4.0
- CUDA capable devices detected:
- GPU0 Tesla C2050 / C2070, 2652 MB, Compute 2.0 (single,double) (in use)
- size: 512x512
- cv-gpu: 0.0021476
- jacket: 0.00167873
- size: 1024x1024
- cv-gpu: 0.00802897
- jacket: 0.00602738
- size: 1536x1536
- cv-gpu: 0.00899348
- jacket: 0.00626257
- size: 2048x2048
- cv-gpu: 0.0173659
- jacket: 0.0232162
- size: 2560x2560
- cv-gpu: 0.0177426
- jacket: 0.0236838
- size: 3072x3072
- cv-gpu: 0.0264366
- jacket: 0.0242362
- size: 3584x3584
- cv-gpu: 0.0270701
- jacket: 0.0249083
- */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement