Guest User

Untitled

a guest
Mar 24th, 2018
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.71 KB | None | 0 0
  1. __global__ void
  2. gpuPdist(float *out, float *in, int n, int m){
  3. __shared__ float Ys[16][16];
  4. __shared__ float Xs[16][16];
  5. int bx = blockIdx.x, by = blockIdx.y;
  6. int tx = threadIdx.x, ty = threadIdx.y;
  7. int yBegin = by * 16 * m;
  8. int xBegin = bx * 16 * m;
  9. int yEnd = yBegin + m - 1, y, x, k, o;
  10. float tmp, s = 0;
  11.  
  12. for(y=yBegin,x=xBegin;
  13. y<=yEnd;
  14. y+=16,x+=16){
  15. Ys[ty][tx] = in[y + ty*m + tx];
  16. Xs[tx][ty] = in[x + ty*m + tx];
  17. //*** note the transpose of Xs
  18. __syncthreads();
  19.  
  20. for(k=0;k<16;k++){
  21. tmp = Ys[ty][k] - Xs[k][tx];
  22. s += tmp*tmp;
  23. }
  24. __syncthreads();
  25. }
  26. o = by*16*n + ty*n + bx*16 + tx;
  27. out[o] = sqrtf(s);
  28. }
Add Comment
Please, Sign In to add comment