#include #include #include #include #include #include #include #include #include #include #define Matrix_Size 1000 #define Cache_Line 8 double** Memory_Alloc(); void Free_Memory(double** matrix); void Rand_Matrix(double** matrix, bool dst); bool Check_Matrix(double** matrix_A, double** matrix_B); int Get_Cache_Size(int cache_level); int Get_Num_Elements(int cache_size); void Division_Matrix(int cache_size, int &block_size, int &block_size_bord); void Multiply_default(double** matrix_A, double** matrix_B, double** matrix_Res); void Block_Matrix_Multiply(double **matrix_A, double **matrix_B, double **matrix_Res, int sizeBlock, int block_size_bord); void Multiply_OpenMP(double** matrix_A, double** matrix_B, double** matrix_Res); int main() { unsigned __int64 time; int block_size_L3 = 0; int block_size_bord_L3 = 0; int block_size_L2 = 0; int block_size_bord_L2 = 0; int block_size_L1 = 0; int block_size_bord_L1 = 0; double** matrix_A = Memory_Alloc(); double** matrix_B = Memory_Alloc(); double** result_matrix = Memory_Alloc(); double** result_matrix2 = Memory_Alloc(); double** result_matrix3 = Memory_Alloc(); double** result_matrix4 = Memory_Alloc(); double** result_matrix5 = Memory_Alloc(); std::srand(std::time(0)); Rand_Matrix(matrix_A, 0); Rand_Matrix(matrix_B, 0); Rand_Matrix(result_matrix, 1); Rand_Matrix(result_matrix2, 1); Rand_Matrix(result_matrix3, 1); Rand_Matrix(result_matrix4, 1); Rand_Matrix(result_matrix5, 1); std::cout << "Matrix Size = " << Matrix_Size << "\n"; int cache_size_L3 = Get_Cache_Size(3); int cache_size_L2 = Get_Cache_Size(2); int cache_size_L1 = Get_Cache_Size(1); std::cout << "Cache L3 size = " << cache_size_L3 << "\n"; std::cout << "Cache L2 size = " << cache_size_L2 << "\n"; std::cout << "Cache L1 size = " << cache_size_L1 << "\n"; int max_matrix_size_L3 = Get_Num_Elements(cache_size_L3); int max_matrix_size_L2 = Get_Num_Elements(cache_size_L2); int max_matrix_size_L1 = Get_Num_Elements(cache_size_L1); std::cout << "Max matrix size L3 = " << max_matrix_size_L3 << "\n"; std::cout << "Max matrix size L2 = " << max_matrix_size_L2 << "\n"; std::cout << "Max matrix size L1 = " << max_matrix_size_L1 << "\n"; //Division_Matrix(max_matrix_size_L3, block_size_L3, block_size_bord_L3); //std::cout << "block size L3 = " << block_size_L3 << " block size bord L3 = " << block_size_bord_L3 << "\n"; Division_Matrix(max_matrix_size_L2, block_size_L2, block_size_bord_L2); std::cout << "block size L2 = " << block_size_L2 << " block size bord L2 = " << block_size_bord_L2 << "\n"; Division_Matrix(max_matrix_size_L1, block_size_L1, block_size_bord_L1); std::cout << "block size L1 = " << block_size_L1 << " block size bord L1 = " << block_size_bord_L1 << "\n"; time = __rdtsc(); Multiply_default(matrix_A, matrix_B, result_matrix); std::cout << "Time mux = \t\t" << __rdtsc() - time << " ticks.\n"; time = __rdtsc(); Multiply_OpenMP(matrix_A, matrix_B, result_matrix3); std::cout << "Time OpenMP = \t\t" << __rdtsc() - time << " ticks.\n"; //time = __rdtsc(); //Block_Matrix_Multiply(matrix_A, matrix_B, result_matrix2, block_size_L3, block_size_bord_L3); //std::cout << "Time mux L3 = \t\t" << __rdtsc() - time << " ticks.\n"; time = __rdtsc(); Block_Matrix_Multiply(matrix_A, matrix_B, result_matrix4, block_size_L2, block_size_bord_L2); std::cout << "Time mux L2 = \t\t" << __rdtsc() - time << " ticks.\n"; time = __rdtsc(); Block_Matrix_Multiply(matrix_A, matrix_B, result_matrix5, block_size_L1, block_size_bord_L1); std::cout << "Time mux L1 = \t\t" << __rdtsc() - time << " ticks.\n"; std::cout << "Matrixes 'default' and 'Block' - "; Check_Matrix(result_matrix, result_matrix2); std::cout << "Matrixes 'Block_L3' and 'OpenMP' - "; Check_Matrix(result_matrix2, result_matrix3); std::cout << "Matrixes 'BlockL2' and 'OpenMP' - "; Check_Matrix(result_matrix3, result_matrix4); std::cout << "Matrixes 'BlockL1' and 'BlockL2' - "; Check_Matrix(result_matrix4, result_matrix5); Free_Memory(matrix_A); Free_Memory(matrix_B); Free_Memory(result_matrix); Free_Memory(result_matrix2); Free_Memory(result_matrix3); Free_Memory(result_matrix4); Free_Memory(result_matrix5); system("pause"); return 0; } double** Memory_Alloc() { double** matrix; try { matrix = new double*[Matrix_Size]; for (int i = 0; i < Matrix_Size; i++) { matrix[i] = new double[Matrix_Size]; } } catch (std::bad_alloc excp) { std::cout << "Can't allocate memory. " << std::endl; matrix = NULL; } return matrix; } void Free_Memory(double** matrix) { for (int i = 0; i < Matrix_Size; i++) { delete[] matrix[i]; } delete[] matrix; } void Rand_Matrix(double** matrix, bool dst) { std::srand(std::time(0)); for (int i = 0; i < Matrix_Size; i++) { for (int j = 0; j < Matrix_Size; j++) { if (dst) { matrix[i][j] = 0; } else { matrix[i][j] = std::rand() % 9; } } } } bool Check_Matrix(double** matrix_A, double** matrix_B) { for (int i = 0; i < Matrix_Size; i++) { for (int j = 0; j < Matrix_Size; j++) { if (matrix_A[i][j] != matrix_B[i][j]) { std::cout << "Not equal\n"; return false; } } } std::cout << "Equal\n"; return true; } int Get_Cache_Size(int cache_level) { int cache_size = 0; DWORD buffer_size = 0; SYSTEM_LOGICAL_PROCESSOR_INFORMATION* buffer = NULL; GetLogicalProcessorInformation(0, &buffer_size); try { buffer = new SYSTEM_LOGICAL_PROCESSOR_INFORMATION[buffer_size]; } catch (std::bad_alloc excp) { std::cout << "Can't allocate memory for L3buffer. " << std::endl; buffer = NULL; return 0; } GetLogicalProcessorInformation(&buffer[0], &buffer_size); for (DWORD i = 0; i < buffer_size; i++) { if (buffer[i].Cache.Level == 3 && cache_level == 3) { cache_size = buffer[i].Cache.Size; break; } if (buffer[i].Cache.Level == 2 && cache_level == 2) { cache_size = buffer[i].Cache.Size; break; } if (buffer[i].Cache.Level == 1 && cache_level == 1) { cache_size = buffer[i].Cache.Size; break; } } delete[] buffer; return cache_size; } int Get_Num_Elements(int cache_size) { int memory = (cache_size * 0.8 / 3); // размер кэша в байтах, процент заполнения кэша матрицай, количество равных частей кэша (3 матрицы в нашем случае) int double_num = memory / sizeof(double); // количество элементов типа double double_num = sqrt(double_num); // количество строк/столбцов в матрице while (double_num % Cache_Line != 0) { // кратность 64 байтам double_num--; } return double_num; } void Division_Matrix(int cache_size, int &block_sizee, int &block_size_bord) { for (int i = cache_size; i > Cache_Line; i--) { if ((i % Cache_Line == 0) && (Matrix_Size % i == 0)) { block_sizee = i; block_size_bord = i; return; } } block_sizee = cache_size; block_size_bord = Matrix_Size; while (block_size_bord > block_sizee) { block_size_bord -= block_sizee; } return; } void Multiply_default(double** matrix_A, double** matrix_B, double** matrix_Res) { for (int i = 0; i < Matrix_Size; i++) { for (int j = 0; j < Matrix_Size; j++) { for (int k = 0; k < Matrix_Size; k++) { matrix_Res[i][j] += matrix_A[i][k] * matrix_B[k][j]; } } } } void Block_Matrix_Multiply(double **matrix_A, double **matrix_B, double **matrix_Res, int sizeBlock, int block_size_bord) { int count = Matrix_Size / sizeBlock; if (Matrix_Size % sizeBlock != 0) { count++; } for (int i = 0, sizeIBlock = sizeBlock; i < count; i++) { if (i == count - 1) sizeIBlock = block_size_bord; for (int j = 0, sizeJBlock = sizeBlock; j < count; j++) { if (j == count - 1) sizeJBlock = block_size_bord; for (int k = 0, sizeKBlock = sizeBlock; k < count; k++) { if (k == count - 1) { sizeKBlock = block_size_bord; } for (int l = 0; l < sizeIBlock; l++) { for (int n = 0; n < sizeKBlock; n++) { for (int m = 0; m < sizeJBlock; m++) { matrix_Res[i * sizeBlock + l][j * sizeBlock + m] += matrix_A[i * sizeBlock + l][k * sizeBlock + n] * matrix_B[k * sizeBlock + n][j * sizeBlock + m]; } } } } } } } void Multiply_OpenMP(double** matrix_A, double** matrix_B, double** matrix_Res) { #pragma omp parallel for for (int i = 0; i < Matrix_Size; i++) { for (int k = 0; k < Matrix_Size; k++) { for (int j = 0; j < Matrix_Size; j++) { matrix_Res[i][j] += matrix_A[i][k] * matrix_B[k][j]; } } } }