Guest User

Untitled

a guest
Aug 6th, 2016
118
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 4.17 KB | None | 0 0
  1. #include "stdafx.h"
  2. #include <amp.h>
  3. #include <iostream>
  4. #include <ctime>
  5. #include <omp.h>
  6. #include <iomanip>
  7.  
  8. #define N 2048
  9. #define BLOCK 16
  10. using namespace concurrency;
  11.  
  12. std::vector <std::vector <float> > cA(N, std::vector<float>(N));
  13. std::vector <std::vector <float> > cB(N, std::vector<float>(N)); //Не работает нихуя на AMP
  14. std::vector <std::vector <float> > cC(N, std::vector<float>(N));
  15.  
  16. std::vector <float> A(N * N);
  17. std::vector <float> B(N * N); //А вот это заебись
  18. std::vector <float> C(N * N);
  19.  
  20.  
  21.  
  22.  
  23. void createMatrix()
  24. {
  25.     srand(time(0));
  26.     int index = 0;
  27.     for (int i = 0; i < N; i++)
  28.     {
  29.         for (int j = 0; j < N; j++, index++)
  30.         {
  31.             cA[i][j] = rand() % 100 + 1;
  32.             cB[i][j] = rand() % 100 + 1;
  33.             A[index] = cA[i][j];
  34.             B[index] = cB[i][j];
  35.         }
  36.     }
  37. }
  38.  
  39. void Math_CPU()
  40. {
  41.     float sum;
  42.     for (int l = 0; l < N; l++)
  43.     {
  44.         for (int i = 0; i < N; i++)
  45.         {
  46.             sum = 0;
  47.             for (int j = 0; j < N; j++)
  48.             {
  49.                 sum += cA[l][j] * cB[j][i];
  50.             }
  51.             cC[i][l] = sum;
  52.         }
  53.     }
  54. }
  55.  
  56. void Math_OMP()
  57. {
  58.     float sum;
  59. #pragma omp parallel for reduction(+:sum)
  60.     for (int l = 0; l < N; l++)
  61.     {
  62.         for (int i = 0; i < N; i++)
  63.         {
  64.             sum = 0;
  65.             for (int j = 0; j < N; j++)
  66.             {
  67.                 sum += cA[l][j] * cB[j][i];
  68.             }
  69.             cC[i][l] = sum;
  70.         }
  71.     }
  72. }
  73.  
  74.  
  75. void Math_AMP()
  76. {
  77.     array_view<float, 2> Matrix_A(N, N, A), Matrix_B(N, N, B), Matrix_C(N, N, C);
  78.     Matrix_C.discard_data();
  79.     parallel_for_each(
  80.         Matrix_C.extent, [=](index<2> idx) restrict(amp, cpu)
  81.     {
  82.         float sum = 0;
  83.         for (int j = 0; j < N; j++)
  84.         {
  85.             sum += Matrix_A(idx[0], j) * Matrix_B(j, idx[1]);
  86.         }
  87.         Matrix_C[idx] = sum;
  88.     });
  89.     Matrix_C.synchronize();
  90. }
  91.  
  92. void Math_AMP_2()
  93. {
  94.     array_view<float, 2> Matrix_A(N, N, A), Matrix_B(N, N, B), Matrix_C(N, N, C);
  95.     Matrix_C.discard_data();
  96.  
  97.     parallel_for_each(
  98.         Matrix_C.extent.tile <BLOCK, BLOCK>(), [=](tiled_index <BLOCK, BLOCK> idx) restrict(amp)
  99.     {
  100.         float sum = 0;
  101.         for (int j = 0; j < N; j += BLOCK)
  102.         {
  103.             tile_static int localA[BLOCK][BLOCK], localB[BLOCK][BLOCK];
  104.  
  105.             localA[idx.local[0]][idx.local[1]] = Matrix_A(idx.global[0], idx.local[1] + j);
  106.             localB[idx.local[0]][idx.local[1]] = Matrix_B(idx.local[0] + j, idx.global[1]);
  107.             idx.barrier.wait();
  108.  
  109.             for (int t = 0; t <BLOCK; ++t)
  110.             {
  111.                 sum += localA[idx.local[0]][t] * localB[t][idx.local[1]];
  112.             }
  113.             idx.barrier.wait();
  114.         }
  115.         Matrix_C[idx.global] = sum;
  116.     });
  117. }
  118.  
  119. int main()
  120. {
  121.    
  122.     createMatrix();
  123.    
  124.     accelerator chosen_one;
  125.     std::wcout << chosen_one.description << std::endl << std::endl;
  126.  
  127.     std::cout << "Matrix size: " << N << "*" << N << std::endl;
  128.     int a = rand() % N; // положение элемента матрицы, для теста.
  129.     int b = rand() % N;
  130.  
  131.     /*
  132.     //1 thread
  133.     unsigned int start_time_CPU = clock();
  134.     Math_CPU();
  135.     unsigned int end_time_CPU = clock();
  136.     unsigned int search_time_CPU = end_time_CPU - start_time_CPU;
  137.  
  138.     std::cout << "CPU 1 thread " << "time=" << search_time_CPU / 1000.0 << std::endl;
  139.     */
  140.  
  141.     //OpenMP
  142.     unsigned int start_time_MP = clock();
  143.     Math_OMP();
  144.     unsigned int end_time_MP = clock();
  145.     unsigned int search_time_MP = end_time_MP - start_time_MP;
  146.    
  147.     std::cout << cC[a][b] << " CPU_OpenMP " << "time=" << search_time_MP / 1000.0 << std::endl;
  148.  
  149.     //AMP
  150.     unsigned int start_time_AMP = clock();
  151.     Math_AMP();
  152.     unsigned int end_time_AMP = clock();
  153.     unsigned int search_time_AMP = end_time_AMP - start_time_AMP;
  154.     std::cout << C[a + b * N] << " GPU_AMP_ALL " << "time=" << search_time_AMP / 1000.0 << " boost=" << (float)search_time_MP / search_time_AMP << "x" << std::endl;
  155.  
  156.     //AMP v2
  157.     unsigned int start_time_AMP_2 = clock();
  158.     Math_AMP_2();
  159.     unsigned int end_time_AMP_2 = clock();
  160.     unsigned int search_time_AMP_2 = end_time_AMP_2 - start_time_AMP_2;
  161.     std::cout << C[a + b * N] << " GPU_AMP_block " << "time=" << search_time_AMP_2 / 1000.0 << " boost=" << (float)search_time_MP / search_time_AMP_2 << "x" << std::endl;
  162.  
  163.     std::cout << std::endl;
  164.     system("pause");
  165.     return 0;
  166. }
  167.  
  168.  
  169.  
  170.  
  171. /*
  172.  
  173. AMD Radeon (TM) R9 390 Series
  174.  
  175. Matrix size: 2048*2048
  176. 5.20176e+06 CPU_MAX time=13.226
  177. 5.20176e+06 GPU_AMP_ALL time=0.063 boost=209.937x
  178. 5.20176e+06 GPU_AMP_block time=0.055 boost=240.473x
  179.  
  180. */
Advertisement
Add Comment
Please, Sign In to add comment