Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- __global__ void gpuKernel
- (
- BYTE *src,float *aux, /* source, destination */
- size_t memPitch, /* memory pitch */
- size_t memPitchAux,
- int w,int h, /* size of picture */
- BYTE *Rng,int sizeRng, /* ranges */
- BYTE *Dir,int sizeDir /* directions */
- )
- {
- int rx = ((VEC2*)Rng)->x; // range of calculations (sizeXofwindow = 2*rx+1)
- int ry = ((VEC2*)Rng)->y; // range of calculations (sizeYofwindow = 2*ry+1)
- int i = blockIdx.x * blockDim.x + threadIdx.x; // x-coordinate of pixel = column in devmem
- int j = blockIdx.y * blockDim.y + threadIdx.y; // y-coordinate of pixel = row in devmem
- int idx = j * memPitchAux/sizeof(float) + i;
- int i0 = i-rx, i1 = i+rx; // the range of calculation for columns
- int j0 = j-ry, j1 = j+ry; // the range of calculation for rows
- float g=0.0f ,g0=0.0f;
- if((i0>=0) && (i1< w) && (j0>=0) && (j1< h))
- {
- g = 0.0;
- for(int n=0; n<5; n++)
- {
- for(int js=jsB; js<swy-jsE; js++)
- {
- for(int is=isB; is<swx-isE; is++)
- {
- int ic = (int) fabsf(src[(j0+js)*memPitch+i0+is]-
- src[(j0+js+jsD)*memPitch+i0+is+isD]);
- c[ic] += 1.0f;
- }
- }
- g = fmaxf(g,g0*s);
- }
- aux[idx] = g;
- __syncthreads();
- }
- else
- {
- aux[idx] = 0;
- }
- }
- int main()
- {
- [..]
- checkCudaErrors(cudaMemcpy2D(data->BufGPU[0],data->memPitch, /* to GPU (device) */
- data->BufCPU[0],data->imgPitch, /* from CPU (host) */
- data->imgWidth, data->imgHeight, /* size of data (image) */
- cudaMemcpyHostToDevice));
- cudaThreadSynchronize();
- [..]
- dim3 dimBlock(data->dimBLX,data->dimBLY,1);
- dim3 dimGrid(data->memPitch/dimBlock.x ,data->imgHeight/dimBlock.y,1);
- size_t memPitch = data->memPitch;
- float *auxD;
- size_t auxDPitch;
- size_t auxHPitch;
- checkCudaErrors(cudaMallocPitch((void**)&auxD,&auxDPitch,w*sizeof(float),h));
- auxHPitch = w*sizeof(float);
- float *auxH = (float *) malloc(h*auxHPitch);
- gpuKernel<<<dimGrid,dimBlock>>> (data->BufGPU[0],auxD,
- memPitch,auxDPitch,w, h,
- Rng,sRng,
- Dir,sDir);
- cudaThreadSynchronize();
- checkCudaErrors(cudaMemcpy2D(auxH,auxHPitch, // to CPU (host)
- auxD,auxDPitch, // from GPU (device)
- auxHPitch, h, // size of data (image)
- cudaMemcpyDeviceToHost));
- cudaThreadSynchronize();
- [..]
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement