Guest User

Untitled

a guest
Feb 19th, 2018
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 3.13 KB | None | 0 0
  1. /*
  2. Matt Michels
  3. 10/23/11
  4. lab2 #1a
  5.  
  6. */
  7.  
  8. #include <stdlib.h>
  9. #include <stdio.h>
  10. #include <math.h>
  11.  
  12. #define BLOCK_SIZE 3
  13.  
  14. #define wA 3   // Matrix A width
  15. #define hA 3   // Matrix A height
  16. #define wB 3   // Matrix B width
  17. #define hB wA  // Matrix B height
  18. #define wC wB  // Matrix C width
  19. #define hC hA  // Matrix C height
  20.  
  21.  
  22.  
  23. void randomInit(float* data, int size)
  24. {
  25.     for (int i = 0; i < size; ++i)
  26.         data[i] = (rand() % 10 + 1);
  27. }
  28.  
  29. //create kernel
  30. __global__ void matrixMul( float* C, float* A, float* B, int wA, int wB)
  31. {
  32.  
  33.    int row = threadIdx.y;
  34.    int col = threadIdx.x;
  35.  
  36.    float value = 0;
  37.    for (int i = 0; i < wA; ++i)
  38.    {
  39.       float elementA = A[col * wA + i];
  40.       float elementB = B[i * wB + row];
  41.       value += elementA * elementB;
  42.    }
  43.  
  44.    C[col * wA + row] = value;
  45. }
  46.  
  47.  
  48.  
  49. int main(int argc, char** argv)
  50. {
  51.     srand(1020);
  52.  
  53.     unsigned int size_A = wA * hA;
  54.     unsigned int matrix_size_A = sizeof(float) * size_A;
  55.     float* h_A = (float*) malloc(matrix_size_A);
  56.  
  57.     unsigned int size_B = wB * hB;
  58.     unsigned int matrix_size_B = sizeof(float) * size_B;
  59.     float* h_B = (float*) malloc(matrix_size_B);
  60.  
  61.     randomInit(h_A, size_A);
  62.     randomInit(h_B, size_B);
  63.  
  64.  
  65.     printf("\n\nMatrix A\n");
  66.     for(int i = 0; i < size_A; i++)
  67.     {
  68.        printf("%f ", h_A[i]);
  69.        if(((i + 1) % wA) == 0)
  70.           printf("\n");
  71.     }
  72.  
  73.     printf("\n\nMatrix B\n");
  74.     for(int i = 0; i < size_B; i++)
  75.     {
  76.        printf("%f ", h_B[i]);
  77.        if(((i + 1) % wB) == 0)
  78.           printf("\n");
  79.     }
  80.  
  81.  
  82.     float* d_A;
  83.     float* d_B;
  84.     cudaMalloc((void**) &d_A, matrix_size_A);
  85.     cudaMalloc((void**) &d_B, matrix_size_B);
  86.  
  87.  
  88.     cudaMemcpy(d_A, h_A, matrix_size_A, cudaMemcpyHostToDevice);
  89.     cudaMemcpy(d_B, h_B, matrix_size_B, cudaMemcpyHostToDevice);
  90.  
  91.  
  92.     unsigned int size_C = wC * hC;
  93.     unsigned int matrix_size_C = sizeof(float) * size_C;
  94.     float* h_C = (float*) malloc(matrix_size_C);
  95.  
  96.     float* d_C;
  97.     cudaMalloc((void**) &d_C, matrix_size_C);
  98.  
  99.  
  100.     dim3 threads(BLOCK_SIZE, BLOCK_SIZE);
  101.     dim3 grid(wC / threads.x, hC / threads.y);
  102.  
  103.     //float* val;
  104.     //cudaMalloc((void**) &d_C, matrix_size_C);
  105.  
  106.  
  107.     //   execute the kernel
  108.     matrixMul<<< grid, threads >>>(d_C, d_A, d_B, wA, wB);
  109.  
  110.  
  111. ////////////////////////////////
  112.     //DEBUGGING
  113.     //debugging purposes:
  114. /*    float* test = (float*) malloc(matrix_size_C);
  115.     cudaMemcpy(test, val, matrix_size_C, cudaMemcpyDeviceToHost);
  116.  
  117.     // 6. print out the results
  118.     printf("\n\nMatrix VAL (Results)\n");
  119.     for(int i = 0; i < size_C; i++)
  120.     {
  121.        printf("%f ", test[i]);
  122.        if(((i + 1) % wC) == 0)
  123.           printf("\n");
  124.     }
  125.  
  126. */
  127. /////////////////////////////
  128.  
  129.     cudaMemcpy(h_C, d_C, matrix_size_C, cudaMemcpyDeviceToHost);
  130.  
  131.     printf("\n\nMatrix C (Results)\n");
  132.     for(int i = 0; i < size_C; i++)
  133.     {
  134.        printf("%f ", h_C[i]);
  135.        if(((i + 1) % wC) == 0)
  136.           printf("\n");
  137.     }
  138.     printf("\n");
  139.  
  140.     free(h_A);
  141.     free(h_B);
  142.     free(h_C);
  143.     cudaFree(d_A);
  144.     cudaFree(d_B);
  145.     cudaFree(d_C);
  146.  
  147. }
Add Comment
Please, Sign In to add comment