Advertisement
Guest User

Untitled

a guest
Jan 18th, 2013
257
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.39 KB | None | 0 0
  1.  __global__ void gpuKernel
  2. (
  3.   BYTE *src,float *aux, /* source, destination */
  4.   size_t memPitch, /* memory pitch */
  5.   size_t memPitchAux,
  6.   int w,int h, /* size of picture  */
  7.   BYTE *Rng,int sizeRng, /* ranges */
  8.   BYTE *Dir,int sizeDir /* directions */
  9. )
  10. {
  11.   int rx = ((VEC2*)Rng)->x; // range of calculations (sizeXofwindow = 2*rx+1)
  12.   int ry = ((VEC2*)Rng)->y; // range of calculations (sizeYofwindow = 2*ry+1)
  13.  
  14.   int i = blockIdx.x * blockDim.x + threadIdx.x; // x-coordinate of pixel = column in devmem
  15.   int j = blockIdx.y * blockDim.y + threadIdx.y; // y-coordinate of pixel = row in devmem
  16.   int idx  = j * memPitchAux/sizeof(float) + i;
  17.  
  18.   int i0 = i-rx, i1 = i+rx; // the range of calculation for columns
  19.   int j0 = j-ry, j1 = j+ry; // the range of calculation for rows
  20.  
  21.   float g=0.0f ,g0=0.0f;
  22.  
  23.  if((i0>=0) && (i1< w) && (j0>=0) && (j1< h))
  24.  {
  25.     g = 0.0;
  26.  
  27.   for(int n=0; n<5; n++)
  28.     {
  29.      for(int js=jsB; js<swy-jsE; js++)
  30.       {
  31.     for(int is=isB; is<swx-isE; is++)
  32.     {
  33.       int ic = (int) fabsf(src[(j0+js)*memPitch+i0+is]-
  34.                                src[(j0+js+jsD)*memPitch+i0+is+isD]);
  35.       c[ic] += 1.0f;
  36.     }
  37.       }
  38.        
  39.         g = fmaxf(g,g0*s);
  40.      }
  41.    
  42.     aux[idx] = g;
  43.     __syncthreads();
  44.  }
  45.  else
  46.  {
  47.   aux[idx] = 0;
  48.  }
  49. }
  50.  
  51. int main()
  52. {
  53. [..]
  54.    checkCudaErrors(cudaMemcpy2D(data->BufGPU[0],data->memPitch, /* to GPU (device) */
  55.         data->BufCPU[0],data->imgPitch, /* from CPU (host) */
  56.         data->imgWidth, data->imgHeight, /* size of data (image) */
  57.         cudaMemcpyHostToDevice));
  58.   cudaThreadSynchronize();
  59.  
  60. [..]
  61.   dim3 dimBlock(data->dimBLX,data->dimBLY,1);
  62.   dim3 dimGrid(data->memPitch/dimBlock.x ,data->imgHeight/dimBlock.y,1);
  63.  
  64.   size_t memPitch = data->memPitch;
  65.  
  66.   float *auxD; 
  67.   size_t auxDPitch;
  68.   size_t auxHPitch;
  69.   checkCudaErrors(cudaMallocPitch((void**)&auxD,&auxDPitch,w*sizeof(float),h));
  70.  
  71.   auxHPitch = w*sizeof(float);
  72.   float *auxH = (float *) malloc(h*auxHPitch);
  73.  
  74.   gpuKernel<<<dimGrid,dimBlock>>> (data->BufGPU[0],auxD,
  75.                            memPitch,auxDPitch,w, h,
  76.                            Rng,sRng,
  77.                            Dir,sDir);
  78.   cudaThreadSynchronize();
  79.  
  80.   checkCudaErrors(cudaMemcpy2D(auxH,auxHPitch,  // to CPU (host)
  81.                auxD,auxDPitch,  // from GPU (device)
  82.            auxHPitch, h, // size of data (image)
  83.                cudaMemcpyDeviceToHost));
  84.  
  85.   cudaThreadSynchronize();
  86. [..]
  87. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement