Guest User

sum_reduction.cu

a guest
Apr 7th, 2013
186
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 1.49 KB | None | 0 0
  1. // Note: Compiled with
  2. // nvcc -arch=sm_20 sum_reduction.cu
  3. //
  4. // My NVIDIA devices: http://pastebin.com/8yTzXUuK
  5.  
  6. #include <stdio.h>
  7.  
  8. #define N 512
  9. #define num_t float
  10.  
  11. __shared__ num_t temp[256];
  12.  
  13. // Parallel Summation...
  14. __global__ void psum(num_t *d_sum, num_t *d_data)
  15. {
  16.   extern __shared__ num_t temp[];
  17.   int tid = threadIdx.x;
  18.   temp[tid] = d_data[tid+blockIdx.x * blockDim.x];
  19.   for (int d=blockDim.x>>1; d>=1; d>>=1) {
  20.     __syncthreads();
  21.     if (tid<d) temp[tid] += temp[tid+d];
  22.   }
  23.   if (tid==0) d_sum[blockIdx.x] = temp[0];
  24. }
  25.  
  26. // Initialize the vector
  27. void init_vec(num_t *x){
  28.   int i;
  29.   for (i=0; i<N ; i++){
  30.     x[i] = i;
  31.   }
  32. }
  33.  
  34.  
  35. int main(void){
  36.  
  37.  // Host-size
  38.  num_t *a, *sum;
  39.  num_t *dev_a, *dev_sum;
  40.  size_t size0 = sizeof(num_t);
  41.  size_t size = N * size0;
  42.  a = (num_t*)malloc(size);
  43.  sum = (num_t*)malloc(size0);
  44.  init_vec(a);
  45.  
  46.  // Copy values from the host to the device
  47.  cudaMalloc((void**)&dev_a, size);
  48.  cudaMalloc((void**)&dev_sum, size0);
  49.  cudaMemcpy(dev_a, a, size, cudaMemcpyHostToDevice);
  50.  cudaMemset(dev_sum, 0, size0);
  51.  
  52.  psum<<<4, 128>>>(dev_sum, dev_a);
  53.  
  54.  cudaError_t err = cudaGetLastError();
  55.  if (err != cudaSuccess){
  56.   printf("CUDA ERROR while executing the kernel: %s\n",cudaGetErrorString(err));
  57.   return 103;
  58.  }
  59.  cudaMemcpy(sum, dev_sum, size0, cudaMemcpyDeviceToHost);
  60.  
  61.  printf("sum (cuda) = %f\n", *sum);
  62.  num_t S = 0;
  63.  for (int j=0 ; j<N ; j++){
  64.   S += a[j];
  65.  }
  66.  printf("sum (real) = %f\n", S);
  67.  
  68.  
  69.  return 0;
  70. }
Advertisement
Add Comment
Please, Sign In to add comment