View difference between Paste ID: E60Rvu9j and
SHOW:
|
|
- or go back to the newest paste.
1 | - | |
1 | + | __global__ void gpuKernel |
2 | ( | |
3 | BYTE *src,float *aux, /* source, destination */ | |
4 | size_t memPitch, /* memory pitch */ | |
5 | size_t memPitchAux, | |
6 | int w,int h, /* size of picture */ | |
7 | BYTE *Rng,int sizeRng, /* ranges */ | |
8 | BYTE *Dir,int sizeDir /* directions */ | |
9 | ) | |
10 | { | |
11 | int rx = ((VEC2*)Rng)->x; // range of calculations (sizeXofwindow = 2*rx+1) | |
12 | int ry = ((VEC2*)Rng)->y; // range of calculations (sizeYofwindow = 2*ry+1) | |
13 | ||
14 | int i = blockIdx.x * blockDim.x + threadIdx.x; // x-coordinate of pixel = column in devmem | |
15 | int j = blockIdx.y * blockDim.y + threadIdx.y; // y-coordinate of pixel = row in devmem | |
16 | int idx = j * memPitchAux/sizeof(float) + i; | |
17 | ||
18 | int i0 = i-rx, i1 = i+rx; // the range of calculation for columns | |
19 | int j0 = j-ry, j1 = j+ry; // the range of calculation for rows | |
20 | ||
21 | float g=0.0f ,g0=0.0f; | |
22 | ||
23 | if((i0>=0) && (i1< w) && (j0>=0) && (j1< h)) | |
24 | { | |
25 | g = 0.0; | |
26 | ||
27 | for(int n=0; n<5; n++) | |
28 | { | |
29 | for(int js=jsB; js<swy-jsE; js++) | |
30 | { | |
31 | for(int is=isB; is<swx-isE; is++) | |
32 | { | |
33 | int ic = (int) fabsf(src[(j0+js)*memPitch+i0+is]- | |
34 | src[(j0+js+jsD)*memPitch+i0+is+isD]); | |
35 | c[ic] += 1.0f; | |
36 | } | |
37 | } | |
38 | ||
39 | g = fmaxf(g,g0*s); | |
40 | } | |
41 | ||
42 | aux[idx] = g; | |
43 | __syncthreads(); | |
44 | } | |
45 | else | |
46 | { | |
47 | aux[idx] = 0; | |
48 | } | |
49 | } | |
50 | ||
51 | int main() | |
52 | { | |
53 | [..] | |
54 | checkCudaErrors(cudaMemcpy2D(data->BufGPU[0],data->memPitch, /* to GPU (device) */ | |
55 | data->BufCPU[0],data->imgPitch, /* from CPU (host) */ | |
56 | data->imgWidth, data->imgHeight, /* size of data (image) */ | |
57 | cudaMemcpyHostToDevice)); | |
58 | cudaThreadSynchronize(); | |
59 | ||
60 | [..] | |
61 | dim3 dimBlock(data->dimBLX,data->dimBLY,1); | |
62 | dim3 dimGrid(data->memPitch/dimBlock.x ,data->imgHeight/dimBlock.y,1); | |
63 | ||
64 | size_t memPitch = data->memPitch; | |
65 | ||
66 | float *auxD; | |
67 | size_t auxDPitch; | |
68 | size_t auxHPitch; | |
69 | checkCudaErrors(cudaMallocPitch((void**)&auxD,&auxDPitch,w*sizeof(float),h)); | |
70 | ||
71 | auxHPitch = w*sizeof(float); | |
72 | float *auxH = (float *) malloc(h*auxHPitch); | |
73 | ||
74 | gpuKernel<<<dimGrid,dimBlock>>> (data->BufGPU[0],auxD, | |
75 | memPitch,auxDPitch,w, h, | |
76 | Rng,sRng, | |
77 | Dir,sDir); | |
78 | cudaThreadSynchronize(); | |
79 | ||
80 | checkCudaErrors(cudaMemcpy2D(auxH,auxHPitch, // to CPU (host) | |
81 | auxD,auxDPitch, // from GPU (device) | |
82 | auxHPitch, h, // size of data (image) | |
83 | cudaMemcpyDeviceToHost)); | |
84 | ||
85 | cudaThreadSynchronize(); | |
86 | [..] | |
87 | } |