Advertisement
Guest User

Untitled

a guest
Jun 21st, 2013
47
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.30 KB | None | 0 0
  1. template <unsigned int blockSize>
  2. __global__ void reduce4(int *g_odata, int *g_odata, unsigned int n) {
  3. unsigned int tid = threadIdx.x;
  4. unsigned int i = blockIdx.x * blockSize * 2 + threadIdx.x;
  5. unsigned int gridSize = blockSize * 2 * gridDim.x;
  6. // reduce multiple elements per thread
  7. g_odata[tid] = 0;
  8. while (i < n) {
  9. atomicAdd(&g_odata[tid], (g_idata[i] + g_idata[i+blockSize]));
  10. i += gridSize;
  11. }
  12. __syncthreads();
  13. if (blockSize >= 512) {
  14. if (tid < 256)
  15. atomicAdd(&g_odata[tid], g_odata[tid + 256]);
  16. __syncthreads();
  17. }
  18. if (blockSize >= 256) {
  19. if (tid < 128)
  20. atomicAdd(&g_odata[tid], g_odata[tid + 128]);
  21. __syncthreads();
  22. }
  23. if (blockSize >= 128) {
  24. if (tid < 64)
  25. atomicAdd(&g_odata[tid], g_odata[tid + 64]);
  26. __syncthreads();
  27. }
  28. if (tid < 32) {
  29. if (blockSize >= 64) atomicAdd(&g_odata[tid], g_odata[tid + 32]);
  30. if (blockSize >= 32) atomicAdd(&g_odata[tid], g_odata[tid + 16]);
  31. if (blockSize >= 16) atomicAdd(&g_odata[tid], g_odata[tid + 8]);
  32. if (blockSize >= 8) atomicAdd(&g_odata[tid], g_odata[tid + 4]);
  33. if (blockSize >= 4) atomicAdd(&g_odata[tid], g_odata[tid + 2]);
  34. if (blockSize >= 2) atomicAdd(&g_odata[tid], g_odata[tid + 1]);
  35. }
  36. if (tid == 0)
  37. g_odata[blockIdx.x] = g_odata[0];
  38. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement