alexsetyaev

task_2_matrix

Sep 28th, 2021
551
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #include <iostream>
  2. #include <stdlib.h>
  3. #include <math.h>
  4.  
  5. __global__ void MatrixTrans(double *M, int n){
  6.     int idx = blockIdx.x * blockDim.x + threadIdx.x;
  7.    
  8.     if (idx < n*n && idx % n < idx / n){
  9.        
  10.         double tmp = M[idx];
  11.         M[idx] = M[(idx%n)*n + idx/n];
  12.         M[(idx%n)*n + idx/n] = tmp;
  13.     }
  14. }
  15.  
  16. int main(){
  17.     int n = 6;
  18.     double *h_m, *res_h_m;
  19.     size_t bytes = n * n * sizeof(double);
  20.    
  21.     h_m = (double*)malloc(bytes);
  22.     res_h_m = (double*)malloc(bytes);
  23.    
  24.     for (int i = 0; i < n; ++i){
  25.         for (int j = 0; j < n; ++j){
  26.             h_m[i*n + j] = 10*i + j;
  27.             std::cout << h_m[i*n + j] << " ";
  28.         }
  29.         std::cout<<std::endl;
  30.     }
  31.    
  32.     double *d_m;
  33.    
  34.     cudaMalloc(&d_m, bytes);
  35.    
  36.     cudaMemcpy(d_m, h_m, bytes, cudaMemcpyHostToDevice);
  37.    
  38.     int block_size, grid_size;
  39.     block_size = 1024;
  40.     grid_size = (n-1)/block_size + 1;
  41.    
  42.  
  43.     //cudaEvent_t start_gpu, stop_gpu;
  44.     //cudaEventCreate(&start_gpu);
  45.     //cudaEventCreate(&stop_gpu);
  46.    
  47.     //cudaEventRecord(start_gpu);
  48.        
  49.     MatrixTrans<<<grid_size, block_size>>>(d_m, n);
  50.    
  51.     //cudaDeviceSynchronize();
  52.     //cudaEventRecord(stop_gpu);
  53.    
  54.     //float delta = 0.0;
  55.     //cudaEventElapsedTime(&delta, start_gpu, stop_gpu);
  56.    
  57.  
  58.    
  59.     cudaMemcpy(res_h_m, d_m, bytes, cudaMemcpyDeviceToHost);
  60.     /*
  61.     for (int i = 0; i < n; ++i){
  62.         std::cout << h_c[i] << std::endl;
  63.     }*/
  64.  
  65.    
  66.     for (int i = 0; i < n; ++i){
  67.         for (int j = 0; j < n; ++j){
  68.                 std::cout << res_h_m[i*n + j] << " ";
  69.         }
  70.         std::cout<<std::endl;
  71.         }
  72.    
  73.     //std::cout << "Elapsed time" << delta << std::endl;
  74.    
  75.     cudaFree(d_m);
  76.    
  77.     free(h_m);
  78.    
  79.    
  80.     return 0;
  81. }
  82.  
RAW Paste Data