Advertisement
Guest User

Untitled

a guest
May 22nd, 2017
52
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 0.72 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <cuda.h>
  3.  
  4. __global__ void incr(float *a, int N){
  5.     int idx = blockIdx.x * blockDim.x + threadIdx.x;
  6.     if(idx < N){
  7.         a[idx] *= 10;
  8.     }
  9. }
  10.  
  11. int main(void)
  12. {
  13. float *a_h, *a_d;
  14. const int N = 100;
  15. size_t size = N*sizeof(float);
  16.  
  17. a_h = (float *)malloc(size);
  18. cudaMalloc( (void**) &a_d, size);
  19.  
  20. for(int i=0; i<N; ++i){
  21.     a_h[i] = (float)i;
  22. }
  23.  
  24. cudaMemcpy(a_d, a_h, size, cudaMemcpyHostToDevice);
  25.  
  26. int block_size = 4;
  27. int n_blocks = N/block_size + (N%block_size == 0 ? 0:1);
  28.  
  29. incr <<<n_blocks, block_size>>> (a_d, N);
  30.  
  31. cudaMemcpy(a_h, a_d, size, cudaMemcpyDeviceToHost);
  32.  
  33. for(int i=0; i<N; ++i){
  34.     printf("%d: %3.0f\n", i, a_h[i]);
  35. }
  36.  
  37. free(a_h); cudaFree(a_d);
  38.  
  39. system("Pause");   
  40. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement