Advertisement
Guest User

Untitled

a guest
Jun 19th, 2019
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.19 KB | None | 0 0
  1. #include <cuda_runtime.h>
  2. #include <helper_cuda.h>
  3. #include <iostream>
  4. #include <stdio.h>
  5. #define N 5
  6. __global__ void matrixAdd(int* A, int* B, int* C) {
  7. int i = blockIdx.x * blockDim.x + threadIdx.x;
  8. int j = blockIdx.y * blockDim.y + threadIdx.y;
  9. A[i * N + j] = B[i * N + j] + C[i * N + j];
  10. }
  11.  
  12. int main() {
  13. int h_A[N][N], h_B[N][N], h_C[N][N];
  14. size_t size = sizeof(int) * N;
  15. int* d_A, * d_B, * d_C;
  16. cudaMalloc((void**)& d_A, size * N);
  17. cudaMalloc((void**)& d_B, size * N);
  18. cudaMalloc((void**)& d_C, size * N);
  19. for (int i = 0; i < N; i++) {
  20. for (int j = 0; j < N; j++) {
  21. h_A[i][j] = -1;
  22. h_B[i][j] = 1;
  23. }
  24. }
  25. cudaMemcpy(d_A, h_A, size * N, cudaMemcpyHostToDevice);
  26. cudaMemcpy(d_B, h_B, size * N, cudaMemcpyHostToDevice);
  27. cudaMemcpy(d_C, h_C, size * N, cudaMemcpyHostToDevice);
  28. dim3 blocks = { 2, 2 };
  29. dim3 threads = { 16,16 };
  30. matrixAdd << < blocks, threads >> > (d_A, d_B, d_C);
  31. cudaMemcpy(h_C, d_C, size * N, cudaMemcpyDeviceToHost);
  32.  
  33. for (int i = 0; i < N; i++) {
  34. for (int j = 0; j < N; j++) {
  35. std::cout << h_A[i][j] << " + " << h_B[i][j] << " = " << h_C[i][j] << '\n';
  36. }
  37. }
  38.  
  39. cudaFree(d_A);
  40. cudaFree(d_B);
  41. cudaFree(d_C);
  42. return 0;
  43. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement