Advertisement
Guest User

Untitled

a guest
Jun 19th, 2019
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.37 KB | None | 0 0
  1. #include <cuda_runtime.h>
  2. #include <helper_cuda.h>
  3. #include <iostream>
  4. #include <stdio.h>
  5. #define N 5
  6. __global__ void matrixAdd(int* A, int* B, int* C) {
  7. int i = blockIdx.x * blockDim.x + threadIdx.x;
  8. int j = blockIdx.y * blockDim.y + threadIdx.y;
  9. A[i * N + j] = B[i * N + j] + C[i * N + j];
  10. }
  11.  
  12. int main() {
  13. //Allocate matrix A,B,C for host
  14. int h_A[N][N], h_B[N][N], h_C[N][N];
  15. size_t size = sizeof(int) * N;
  16. //Allocate matrix A,B,C for device
  17. int* d_A, * d_B, * d_C;
  18. cudaMalloc((void**)& d_A, size * N);
  19. cudaMalloc((void**)& d_B, size * N);
  20. cudaMalloc((void**)& d_C, size * N);
  21. //Initializing A,B
  22. for (int i = 0; i < N; i++) {
  23. for (int j = 0; j < N; j++) {
  24. h_A[i][j] = -1;
  25. h_B[i][j] = 1;
  26. }
  27. }
  28. //From device to host memory copy
  29. cudaMemcpy(d_A, h_A, size * N, cudaMemcpyHostToDevice);
  30. cudaMemcpy(d_B, h_B, size * N, cudaMemcpyHostToDevice);
  31. cudaMemcpy(d_C, h_C, size * N, cudaMemcpyHostToDevice);
  32. dim3 blocks = { 2, 2 };
  33. dim3 threads = { 16,16 };
  34. matrixAdd << < blocks, threads >> > (d_A, d_B, d_C);
  35. //From host to device memory copy
  36. cudaMemcpy(h_C, d_C, size * N, cudaMemcpyDeviceToHost);
  37. //Cheking the answer
  38. for (int i = 0; i < N; i++) {
  39. for (int j = 0; j < N; j++) {
  40. std::cout << h_A[i][j] << " + " << h_B[i][j] << " = " << h_C[i][j] << '\n';
  41. }
  42. }
  43.  
  44. cudaFree(d_A);
  45. cudaFree(d_B);
  46. cudaFree(d_C);
  47. return 0;
  48. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement