Advertisement
Guest User

Untitled

a guest
Jan 22nd, 2019
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.95 KB | None | 0 0
  1. #include <iostream>
  2. #include <math.h>
  3. // Kernel function to add the elements of two arrays
  4. __global__
  5. void add(int n, float *x, float *y)
  6. {
  7. for (int i = 0; i < n; i++)
  8. y[i] = x[i] + y[i];
  9. }
  10.  
  11. int main(void)
  12. {
  13. int N = 1<<20;
  14. float *x, *y;
  15.  
  16. // Allocate Unified Memory – accessible from CPU or GPU
  17. cudaMallocManaged(&x, N*sizeof(float));
  18. cudaMallocManaged(&y, N*sizeof(float));
  19.  
  20. // initialize x and y arrays on the host
  21. for (int i = 0; i < N; i++) {
  22. x[i] = 1.0f;
  23. y[i] = 2.0f;
  24. }
  25.  
  26. // Run kernel on 1M elements on the GPU
  27. add<<<1, 1>>>(N, x, y);
  28.  
  29. // Wait for GPU to finish before accessing on host
  30. cudaDeviceSynchronize();
  31.  
  32. // Check for errors (all values should be 3.0f)
  33. float maxError = 0.0f;
  34. for (int i = 0; i < N; i++)
  35. maxError = fmax(maxError, fabs(y[i]-3.0f));
  36. std::cout << "Max error: " << maxError << std::endl;
  37.  
  38. // Free memory
  39. cudaFree(x);
  40. cudaFree(y);
  41.  
  42. return 0;
  43. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement