Advertisement
rainman002

Untitled

Jan 9th, 2016
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 4.66 KB | None | 0 0
  1. /*
  2.  ============================================================================
  3.  Name        : RGB.cu
  4.  Author      : kyle
  5.  Version     :
  6.  Copyright   : poop
  7.  Description : CUDA compute reciprocals
  8.  ============================================================================
  9.  */
  10.  
  11. #include <iostream>
  12. #include <numeric>
  13. #include <stdlib.h>
  14. #include "lodepng.h"
  15.  
  16. static void CheckCudaErrorAux (const char *file, unsigned line, const char *statement, cudaError_t err)
  17. {
  18.     if (err == cudaSuccess) return;
  19.     std::cerr << statement<<" returned " << cudaGetErrorString(err) << "("<<err<< ") at "<<file<<":"<<line << std::endl;
  20.     exit (1);
  21. }
  22. #define CUDA_CHECK_RETURN(value) CheckCudaErrorAux(__FILE__,__LINE__, #value, value)
  23.  
  24.  
  25.  
  26. static const int mask = (1<<24)-1;
  27. static const int w = 1<<12;
  28. static const int l = 1<<24;
  29.  
  30. #define MAX(a,b) \
  31.    ({ __typeof__ (a) _a = (a); \
  32.       __typeof__ (b) _b = (b); \
  33.       _a > _b ? _a : _b; })
  34.  
  35. #define MIN(a,b) \
  36.    ({ __typeof__ (a) _a = (a); \
  37.       __typeof__ (b) _b = (b); \
  38.       _a < _b ? _a : _b; })
  39.  
  40. #define ABS(a) \
  41.     ({ __typeof__ (a) _a = (a); \
  42.     _a < 0 ? -_a : _a; })
  43.  
  44. #define DIFF(c1,c2) \
  45.    ({ int _c1 = *(c1); int _c2 = *(c2); \
  46.       int _r = (_c1>>16 & 0xff) - (_c2>>16 & 0xff); \
  47.       int _g = (_c1>>8  & 0xff) - (_c2>>8  & 0xff); \
  48.       int _b = (_c1>>0  & 0xff) - (_c2>>0  & 0xff); \
  49.       (_r*_r+_g*_g+_b*_b)  ; })
  50.  
  51. #define DIFF2(c1,c2) \
  52.    ({ char4 _c1 = *(char4*)(c1); char4 _c2 = *(char4*)(c2); \
  53.       _c1.x-=_c2.x; _c1.y-=_c2.y;_c1.z-=_c2.z; \
  54.       _c1.x*_c1.x+_c1.y*_c1.y+_c1.z*_c1.z; })
  55.  
  56. __global__ void scoreKernel(int* gpu_img, int color)
  57. {
  58.     uint64_t i = (blockIdx.x*blockDim.x+threadIdx.x);
  59.     if(i < l && (gpu_img[i] & 0xff000000) == 0)
  60.     {
  61.         int score = 0x00ffffff;
  62.         int i1 = (i + 1 + w) & mask;
  63.         int i2 = (i - 1 + w) & mask;
  64.         int i3 = (i + 1 - w) & mask;
  65.         int i4 = (i - 1 - w) & mask;
  66.  
  67.         if(gpu_img[i1] & 0xff000000)
  68.             score = MIN(DIFF(&gpu_img[i1], &color), score);
  69.         if(gpu_img[i2] & 0xff000000)
  70.             score = MIN(DIFF(&gpu_img[i2], &color), score);
  71.         if(gpu_img[i3] & 0xff000000)
  72.             score = MIN(DIFF(&gpu_img[i3], &color), score);
  73.         if(gpu_img[i4] & 0xff000000)
  74.             score = MIN(DIFF(&gpu_img[i4], &color), score);
  75.  
  76.         gpu_img[i] = score;
  77.     }
  78. }
  79.  
  80. __global__ void minKernel(int* gpu_img)
  81. {
  82.     uint64_t i = (blockIdx.x*blockDim.x+threadIdx.x);
  83.     int mini = 0;
  84.     int mins = 0xffffff;
  85.     if(i < w)
  86.     {
  87.         while(i<l)
  88.         {
  89.             if((gpu_img[i] & 0xff000000) == 0 && gpu_img[i] < mins)
  90.             {
  91.                 mins = gpu_img[i];
  92.                 mini = i;
  93.             }
  94.             i += w;
  95.         }
  96.         gpu_img[i] = mins;
  97.         gpu_img[i+w] = mini;
  98.     }
  99. }
  100. __global__ void placeKernel(int* gpu_img, int color)
  101. {
  102.     uint64_t i = (blockIdx.x*blockDim.x+threadIdx.x);
  103.     int mini = 0;
  104.     int mins = 0xffffff;
  105.     if(i == 0)
  106.     {
  107.         while(i<w)
  108.         {
  109.             if(gpu_img[l+i] < mins)
  110.             {
  111.                 mins = gpu_img[l+i];
  112.                 mini = gpu_img[l+i+w];
  113.             }
  114.             i++;
  115.         }
  116.  
  117.         gpu_img[mini] = 0xff000000 | color;
  118.     }
  119. }
  120.  
  121. // ----------
  122.  
  123. int blockCount(int length, int blockSize)
  124. {
  125.      return (length+blockSize-1)/blockSize;
  126. }
  127.  
  128. int dt()
  129. {
  130.     static uint64_t lastt = 0;
  131.     uint64_t t = clock();
  132.     uint64_t dt = t - lastt;
  133.     lastt = t;
  134.     return (int)dt;
  135. }
  136.  
  137. void placeColorGpu(int* gpu_img, int color)
  138. {
  139.     static const int BLOCK_SIZE = 256;
  140.  
  141.     int bCount = blockCount(l, BLOCK_SIZE);
  142.     scoreKernel<<<bCount, BLOCK_SIZE>>> (gpu_img, color);
  143.  
  144.     bCount = blockCount(w, BLOCK_SIZE);
  145.     minKernel<<<bCount, BLOCK_SIZE>>> (gpu_img);
  146.  
  147.     placeKernel<<<1,1>>> (gpu_img, color);
  148. }
  149.  
  150. int main(void)
  151. {
  152.     int *img = new int[l];
  153.     int *colors = new int[l];
  154.  
  155.     int *gpu_img;
  156.  
  157.     for (int i=0 ; i<l ; ++i)
  158.     {
  159.         colors[i] = i;
  160.         img[i] = 0;
  161.     }
  162.  
  163.     img[l/2+w/2] = 0xff000000;
  164.  
  165.     CUDA_CHECK_RETURN(cudaMalloc((void **)&gpu_img, sizeof(int)*(l+2*w)));
  166.     CUDA_CHECK_RETURN(cudaMemcpy(gpu_img, img, sizeof(int)*l, cudaMemcpyHostToDevice));
  167.  
  168. //  int a = 0x01020304;
  169. //  int b = 0x05050505;
  170. //
  171. //  std::cout << "diff2" << DIFF2(&a ,&b) << std::endl;
  172.  
  173.     dt();
  174.     uint64_t tic,toc = clock();
  175.     int ticki = 0;
  176.     int n = 500; // set to l for full image.
  177.     for(int i=1 ; i<n ; i++)
  178.     {
  179.         placeColorGpu(gpu_img, colors[i]);
  180.         if((tic = clock()) - toc > 1e6)
  181.         {
  182.             std::cout << (i*100.0/n) << "% pps " << (i-ticki)*1e6/(tic-toc) << std::endl;
  183.             ticki = i;
  184.             toc = tic;
  185.         }
  186.     }
  187.     std::cout << "time" << ": " << dt()/1e6 << std::endl;
  188.  
  189.     CUDA_CHECK_RETURN(cudaMemcpy(img, gpu_img, sizeof(int)*l, cudaMemcpyDeviceToHost));
  190.     CUDA_CHECK_RETURN(cudaFree(gpu_img));
  191.  
  192.     char filename[128] = "/tmp/cody.png";
  193.     int status = lodepng_encode32_file(filename, (unsigned char*)img, w, w);
  194.  
  195.     std::cout << "png output \"" << filename << "\" status " << status << std::endl;
  196.  
  197.     delete[] img;
  198.     delete[] colors;
  199.  
  200.     return 0;
  201. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement