Advertisement
Guest User

Untitled

a guest
Feb 22nd, 2018
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 8.79 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <malloc.h>
  3. #include <math.h>
  4. #include <xmmintrin.h>
  5. #include <time.h>
  6. #include <omp.h>
  7. #include <iostream>
  8. #include <ctime>
  9. #include <iostream>
  10. #include <Windows.h>
  11.  
  12. #define Matrix_Size  1000
  13. #define Cache_Line 8       
  14.  
  15. double** Memory_Alloc();
  16. void Free_Memory(double** matrix);
  17. void Rand_Matrix(double** matrix, bool dst);
  18. bool Check_Matrix(double** matrix_A, double** matrix_B);
  19. int Get_Cache_Size(int cache_level);
  20. int Get_Num_Elements(int cache_size);
  21. void Division_Matrix(int cache_size, int &block_size, int &block_size_bord);
  22. void Multiply_default(double** matrix_A, double** matrix_B, double** matrix_Res);
  23. void Block_Matrix_Multiply(double **matrix_A, double **matrix_B, double **matrix_Res, int sizeBlock, int block_size_bord);
  24. void Multiply_OpenMP(double** matrix_A, double** matrix_B, double** matrix_Res);
  25.  
  26. int main() {
  27.     unsigned __int64 time;
  28.     int block_size_L3 = 0;
  29.     int block_size_bord_L3 = 0;
  30.     int block_size_L2 = 0;
  31.     int block_size_bord_L2 = 0;
  32.     int block_size_L1 = 0;
  33.     int block_size_bord_L1 = 0;
  34.  
  35.     double** matrix_A = Memory_Alloc();
  36.     double** matrix_B = Memory_Alloc();
  37.     double** result_matrix = Memory_Alloc();
  38.     double** result_matrix2 = Memory_Alloc();
  39.     double** result_matrix3 = Memory_Alloc();
  40.     double** result_matrix4 = Memory_Alloc();
  41.     double** result_matrix5 = Memory_Alloc();
  42.  
  43.     std::srand(std::time(0));
  44.     Rand_Matrix(matrix_A, 0);
  45.     Rand_Matrix(matrix_B, 0);
  46.     Rand_Matrix(result_matrix, 1);
  47.     Rand_Matrix(result_matrix2, 1);
  48.     Rand_Matrix(result_matrix3, 1);
  49.     Rand_Matrix(result_matrix4, 1);
  50.     Rand_Matrix(result_matrix5, 1);
  51.  
  52.     std::cout << "Matrix Size = " << Matrix_Size << "\n";
  53.  
  54.     int cache_size_L3 = Get_Cache_Size(3);
  55.     int cache_size_L2 = Get_Cache_Size(2);
  56.     int cache_size_L1 = Get_Cache_Size(1);
  57.     std::cout << "Cache L3 size = " << cache_size_L3 << "\n";
  58.     std::cout << "Cache L2 size = " << cache_size_L2 << "\n";
  59.     std::cout << "Cache L1 size = " << cache_size_L1 << "\n";
  60.  
  61.     int max_matrix_size_L3 = Get_Num_Elements(cache_size_L3);
  62.     int max_matrix_size_L2 = Get_Num_Elements(cache_size_L2);
  63.     int max_matrix_size_L1 = Get_Num_Elements(cache_size_L1);
  64.     std::cout << "Max matrix size L3 = " << max_matrix_size_L3 << "\n";
  65.     std::cout << "Max matrix size L2 = " << max_matrix_size_L2 << "\n";
  66.     std::cout << "Max matrix size L1 = " << max_matrix_size_L1 << "\n";
  67.  
  68.     //Division_Matrix(max_matrix_size_L3, block_size_L3, block_size_bord_L3);
  69.     //std::cout << "block size L3 = " << block_size_L3 << " block size bord L3 = " << block_size_bord_L3 << "\n";
  70.     Division_Matrix(max_matrix_size_L2, block_size_L2, block_size_bord_L2);
  71.     std::cout << "block size L2 = " << block_size_L2 << " block size bord L2 = " << block_size_bord_L2 << "\n";
  72.     Division_Matrix(max_matrix_size_L1, block_size_L1, block_size_bord_L1);
  73.     std::cout << "block size L1 = " << block_size_L1 << " block size bord L1 = " << block_size_bord_L1 << "\n";
  74.  
  75.     time = __rdtsc();
  76.     Multiply_default(matrix_A, matrix_B, result_matrix);
  77.     std::cout << "Time mux = \t\t" << __rdtsc() - time << " ticks.\n";
  78.  
  79.     time = __rdtsc();
  80.     Multiply_OpenMP(matrix_A, matrix_B, result_matrix3);
  81.     std::cout << "Time OpenMP = \t\t" << __rdtsc() - time << " ticks.\n";
  82.  
  83.     //time = __rdtsc();
  84.     //Block_Matrix_Multiply(matrix_A, matrix_B, result_matrix2, block_size_L3, block_size_bord_L3);
  85.     //std::cout << "Time mux L3 = \t\t" << __rdtsc() - time << " ticks.\n";
  86.  
  87.     time = __rdtsc();
  88.     Block_Matrix_Multiply(matrix_A, matrix_B, result_matrix4, block_size_L2, block_size_bord_L2);
  89.     std::cout << "Time mux L2 = \t\t" << __rdtsc() - time << " ticks.\n";
  90.  
  91.     time = __rdtsc();
  92.     Block_Matrix_Multiply(matrix_A, matrix_B, result_matrix5, block_size_L1, block_size_bord_L1);
  93.     std::cout << "Time mux L1 = \t\t" << __rdtsc() - time << " ticks.\n";
  94.  
  95.     std::cout << "Matrixes 'default' and 'Block' - ";
  96.     Check_Matrix(result_matrix, result_matrix2);
  97.     std::cout << "Matrixes 'Block_L3' and 'OpenMP' - ";
  98.     Check_Matrix(result_matrix2, result_matrix3);
  99.     std::cout << "Matrixes 'BlockL2' and 'OpenMP' - ";
  100.     Check_Matrix(result_matrix3, result_matrix4);
  101.     std::cout << "Matrixes 'BlockL1' and 'BlockL2' - ";
  102.     Check_Matrix(result_matrix4, result_matrix5);
  103.  
  104.     Free_Memory(matrix_A);
  105.     Free_Memory(matrix_B);
  106.     Free_Memory(result_matrix);
  107.     Free_Memory(result_matrix2);
  108.     Free_Memory(result_matrix3);
  109.     Free_Memory(result_matrix4);
  110.     Free_Memory(result_matrix5);
  111.  
  112.     system("pause");
  113.     return 0;
  114. }
  115.  
  116. double** Memory_Alloc() {
  117.     double** matrix;
  118.  
  119.     try {
  120.         matrix = new double*[Matrix_Size];
  121.         for (int i = 0; i < Matrix_Size; i++) {
  122.             matrix[i] = new double[Matrix_Size];
  123.         }
  124.     }
  125.     catch (std::bad_alloc excp) {
  126.         std::cout << "Can't allocate memory. " << std::endl;
  127.         matrix = NULL;
  128.     }
  129.  
  130.     return matrix;
  131. }
  132.  
  133. void Free_Memory(double** matrix) {
  134.  
  135.     for (int i = 0; i < Matrix_Size; i++) {
  136.         delete[] matrix[i];
  137.     }
  138.     delete[] matrix;
  139. }
  140.  
  141. void Rand_Matrix(double** matrix, bool dst) {
  142.  
  143.     std::srand(std::time(0));
  144.  
  145.     for (int i = 0; i < Matrix_Size; i++) {
  146.         for (int j = 0; j < Matrix_Size; j++) {
  147.             if (dst) {
  148.                 matrix[i][j] = 0;
  149.             }
  150.             else {
  151.                 matrix[i][j] = std::rand() % 9;
  152.             }
  153.         }
  154.     }
  155. }
  156.  
  157. bool Check_Matrix(double** matrix_A, double** matrix_B) {
  158.  
  159.     for (int i = 0; i < Matrix_Size; i++) {
  160.         for (int j = 0; j < Matrix_Size; j++) {
  161.             if (matrix_A[i][j] != matrix_B[i][j]) {
  162.                 std::cout << "Not equal\n";
  163.                 return false;
  164.             }
  165.         }
  166.     }
  167.     std::cout << "Equal\n";
  168.     return true;
  169. }
  170.  
  171. int Get_Cache_Size(int cache_level) {
  172.  
  173.     int cache_size = 0;
  174.     DWORD buffer_size = 0;
  175.     SYSTEM_LOGICAL_PROCESSOR_INFORMATION* buffer = NULL;
  176.  
  177.     GetLogicalProcessorInformation(0, &buffer_size);
  178.     try {
  179.         buffer = new SYSTEM_LOGICAL_PROCESSOR_INFORMATION[buffer_size];
  180.     }
  181.     catch (std::bad_alloc excp) {
  182.         std::cout << "Can't allocate memory for L3buffer. " << std::endl;
  183.         buffer = NULL;
  184.         return 0;
  185.     }
  186.  
  187.     GetLogicalProcessorInformation(&buffer[0], &buffer_size);
  188.  
  189.     for (DWORD i = 0; i < buffer_size; i++) {
  190.         if (buffer[i].Cache.Level == 3 && cache_level == 3) {
  191.             cache_size = buffer[i].Cache.Size;
  192.             break;
  193.         }
  194.         if (buffer[i].Cache.Level == 2 && cache_level == 2) {
  195.             cache_size = buffer[i].Cache.Size;
  196.             break;
  197.         }
  198.         if (buffer[i].Cache.Level == 1 && cache_level == 1) {
  199.             cache_size = buffer[i].Cache.Size;
  200.             break;
  201.         }
  202.     }
  203.  
  204.     delete[] buffer;
  205.     return cache_size;
  206. }
  207.  
  208. int Get_Num_Elements(int cache_size) {
  209.  
  210.     int memory = (cache_size * 0.8 / 3);        // размер кэша в байтах, процент заполнения кэша матрицай, количество равных частей кэша (3 матрицы в нашем случае)
  211.     int double_num = memory / sizeof(double);   // количество элементов типа double
  212.     double_num = sqrt(double_num);              // количество строк/столбцов в матрице
  213.  
  214.     while (double_num % Cache_Line != 0) {      // кратность 64 байтам
  215.         double_num--;
  216.     }
  217.  
  218.     return double_num;
  219. }
  220.  
  221. void Division_Matrix(int cache_size, int &block_sizee, int &block_size_bord) {
  222.  
  223.     for (int i = cache_size; i > Cache_Line; i--) {
  224.         if ((i % Cache_Line == 0) && (Matrix_Size % i == 0)) {
  225.             block_sizee = i;
  226.             block_size_bord = i;
  227.             return;
  228.         }
  229.     }
  230.  
  231.     block_sizee = cache_size;
  232.     block_size_bord = Matrix_Size;
  233.  
  234.     while (block_size_bord > block_sizee) {
  235.         block_size_bord -= block_sizee;
  236.     }
  237.  
  238.     return;
  239. }
  240.  
  241. void Multiply_default(double** matrix_A, double** matrix_B, double** matrix_Res) {
  242.  
  243.     for (int i = 0; i < Matrix_Size; i++) {
  244.         for (int j = 0; j < Matrix_Size; j++) {
  245.             for (int k = 0; k < Matrix_Size; k++) {
  246.                 matrix_Res[i][j] += matrix_A[i][k] * matrix_B[k][j];
  247.             }
  248.         }
  249.     }
  250. }
  251.  
  252. void Block_Matrix_Multiply(double **matrix_A, double **matrix_B, double **matrix_Res, int sizeBlock, int block_size_bord) {
  253.  
  254.     int count = Matrix_Size / sizeBlock;
  255.     if (Matrix_Size % sizeBlock != 0) {
  256.         count++;
  257.     }
  258.  
  259.     for (int i = 0, sizeIBlock = sizeBlock; i < count; i++) {
  260.         if (i == count - 1)
  261.             sizeIBlock = block_size_bord;
  262.         for (int j = 0, sizeJBlock = sizeBlock; j < count; j++) {
  263.             if (j == count - 1)
  264.                 sizeJBlock = block_size_bord;
  265.             for (int k = 0, sizeKBlock = sizeBlock; k < count; k++) {
  266.                 if (k == count - 1) {
  267.                     sizeKBlock = block_size_bord;
  268.                 }
  269.  
  270.                 for (int l = 0; l < sizeIBlock; l++) {
  271.                     for (int n = 0; n < sizeKBlock; n++) {
  272.                         for (int m = 0; m < sizeJBlock; m++) {
  273.                             matrix_Res[i * sizeBlock + l][j * sizeBlock + m] +=
  274.                                 matrix_A[i * sizeBlock + l][k * sizeBlock + n] *
  275.                                 matrix_B[k * sizeBlock + n][j * sizeBlock + m];
  276.                         }
  277.                     }
  278.                 }
  279.             }
  280.         }
  281.     }
  282. }
  283.  
  284. void Multiply_OpenMP(double** matrix_A, double** matrix_B, double** matrix_Res) {
  285.  
  286. #pragma omp parallel for
  287.     for (int i = 0; i < Matrix_Size; i++) {
  288.         for (int k = 0; k < Matrix_Size; k++) {
  289.             for (int j = 0; j < Matrix_Size; j++) {
  290.                 matrix_Res[i][j] += matrix_A[i][k] * matrix_B[k][j];
  291.             }
  292.         }
  293.     }
  294. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement