Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <malloc.h>
- #include <math.h>
- #include <xmmintrin.h>
- #include <time.h>
- #include <omp.h>
- #include <iostream>
- #include <ctime>
- #include <iostream>
- #include <Windows.h>
- #define Matrix_Size 1000
- #define Cache_Line 8
- double** Memory_Alloc();
- void Free_Memory(double** matrix);
- void Rand_Matrix(double** matrix, bool dst);
- bool Check_Matrix(double** matrix_A, double** matrix_B);
- int Get_Cache_Size(int cache_level);
- int Get_Num_Elements(int cache_size);
- void Division_Matrix(int cache_size, int &block_size, int &block_size_bord);
- void Multiply_default(double** matrix_A, double** matrix_B, double** matrix_Res);
- void Block_Matrix_Multiply(double **matrix_A, double **matrix_B, double **matrix_Res, int sizeBlock, int block_size_bord);
- void Multiply_OpenMP(double** matrix_A, double** matrix_B, double** matrix_Res);
- int main() {
- unsigned __int64 time;
- int block_size_L3 = 0;
- int block_size_bord_L3 = 0;
- int block_size_L2 = 0;
- int block_size_bord_L2 = 0;
- int block_size_L1 = 0;
- int block_size_bord_L1 = 0;
- double** matrix_A = Memory_Alloc();
- double** matrix_B = Memory_Alloc();
- double** result_matrix = Memory_Alloc();
- double** result_matrix2 = Memory_Alloc();
- double** result_matrix3 = Memory_Alloc();
- double** result_matrix4 = Memory_Alloc();
- double** result_matrix5 = Memory_Alloc();
- std::srand(std::time(0));
- Rand_Matrix(matrix_A, 0);
- Rand_Matrix(matrix_B, 0);
- Rand_Matrix(result_matrix, 1);
- Rand_Matrix(result_matrix2, 1);
- Rand_Matrix(result_matrix3, 1);
- Rand_Matrix(result_matrix4, 1);
- Rand_Matrix(result_matrix5, 1);
- std::cout << "Matrix Size = " << Matrix_Size << "\n";
- int cache_size_L3 = Get_Cache_Size(3);
- int cache_size_L2 = Get_Cache_Size(2);
- int cache_size_L1 = Get_Cache_Size(1);
- std::cout << "Cache L3 size = " << cache_size_L3 << "\n";
- std::cout << "Cache L2 size = " << cache_size_L2 << "\n";
- std::cout << "Cache L1 size = " << cache_size_L1 << "\n";
- int max_matrix_size_L3 = Get_Num_Elements(cache_size_L3);
- int max_matrix_size_L2 = Get_Num_Elements(cache_size_L2);
- int max_matrix_size_L1 = Get_Num_Elements(cache_size_L1);
- std::cout << "Max matrix size L3 = " << max_matrix_size_L3 << "\n";
- std::cout << "Max matrix size L2 = " << max_matrix_size_L2 << "\n";
- std::cout << "Max matrix size L1 = " << max_matrix_size_L1 << "\n";
- //Division_Matrix(max_matrix_size_L3, block_size_L3, block_size_bord_L3);
- //std::cout << "block size L3 = " << block_size_L3 << " block size bord L3 = " << block_size_bord_L3 << "\n";
- Division_Matrix(max_matrix_size_L2, block_size_L2, block_size_bord_L2);
- std::cout << "block size L2 = " << block_size_L2 << " block size bord L2 = " << block_size_bord_L2 << "\n";
- Division_Matrix(max_matrix_size_L1, block_size_L1, block_size_bord_L1);
- std::cout << "block size L1 = " << block_size_L1 << " block size bord L1 = " << block_size_bord_L1 << "\n";
- time = __rdtsc();
- Multiply_default(matrix_A, matrix_B, result_matrix);
- std::cout << "Time mux = \t\t" << __rdtsc() - time << " ticks.\n";
- time = __rdtsc();
- Multiply_OpenMP(matrix_A, matrix_B, result_matrix3);
- std::cout << "Time OpenMP = \t\t" << __rdtsc() - time << " ticks.\n";
- //time = __rdtsc();
- //Block_Matrix_Multiply(matrix_A, matrix_B, result_matrix2, block_size_L3, block_size_bord_L3);
- //std::cout << "Time mux L3 = \t\t" << __rdtsc() - time << " ticks.\n";
- time = __rdtsc();
- Block_Matrix_Multiply(matrix_A, matrix_B, result_matrix4, block_size_L2, block_size_bord_L2);
- std::cout << "Time mux L2 = \t\t" << __rdtsc() - time << " ticks.\n";
- time = __rdtsc();
- Block_Matrix_Multiply(matrix_A, matrix_B, result_matrix5, block_size_L1, block_size_bord_L1);
- std::cout << "Time mux L1 = \t\t" << __rdtsc() - time << " ticks.\n";
- std::cout << "Matrixes 'default' and 'Block' - ";
- Check_Matrix(result_matrix, result_matrix2);
- std::cout << "Matrixes 'Block_L3' and 'OpenMP' - ";
- Check_Matrix(result_matrix2, result_matrix3);
- std::cout << "Matrixes 'BlockL2' and 'OpenMP' - ";
- Check_Matrix(result_matrix3, result_matrix4);
- std::cout << "Matrixes 'BlockL1' and 'BlockL2' - ";
- Check_Matrix(result_matrix4, result_matrix5);
- Free_Memory(matrix_A);
- Free_Memory(matrix_B);
- Free_Memory(result_matrix);
- Free_Memory(result_matrix2);
- Free_Memory(result_matrix3);
- Free_Memory(result_matrix4);
- Free_Memory(result_matrix5);
- system("pause");
- return 0;
- }
- double** Memory_Alloc() {
- double** matrix;
- try {
- matrix = new double*[Matrix_Size];
- for (int i = 0; i < Matrix_Size; i++) {
- matrix[i] = new double[Matrix_Size];
- }
- }
- catch (std::bad_alloc excp) {
- std::cout << "Can't allocate memory. " << std::endl;
- matrix = NULL;
- }
- return matrix;
- }
- void Free_Memory(double** matrix) {
- for (int i = 0; i < Matrix_Size; i++) {
- delete[] matrix[i];
- }
- delete[] matrix;
- }
- void Rand_Matrix(double** matrix, bool dst) {
- std::srand(std::time(0));
- for (int i = 0; i < Matrix_Size; i++) {
- for (int j = 0; j < Matrix_Size; j++) {
- if (dst) {
- matrix[i][j] = 0;
- }
- else {
- matrix[i][j] = std::rand() % 9;
- }
- }
- }
- }
- bool Check_Matrix(double** matrix_A, double** matrix_B) {
- for (int i = 0; i < Matrix_Size; i++) {
- for (int j = 0; j < Matrix_Size; j++) {
- if (matrix_A[i][j] != matrix_B[i][j]) {
- std::cout << "Not equal\n";
- return false;
- }
- }
- }
- std::cout << "Equal\n";
- return true;
- }
- int Get_Cache_Size(int cache_level) {
- int cache_size = 0;
- DWORD buffer_size = 0;
- SYSTEM_LOGICAL_PROCESSOR_INFORMATION* buffer = NULL;
- GetLogicalProcessorInformation(0, &buffer_size);
- try {
- buffer = new SYSTEM_LOGICAL_PROCESSOR_INFORMATION[buffer_size];
- }
- catch (std::bad_alloc excp) {
- std::cout << "Can't allocate memory for L3buffer. " << std::endl;
- buffer = NULL;
- return 0;
- }
- GetLogicalProcessorInformation(&buffer[0], &buffer_size);
- for (DWORD i = 0; i < buffer_size; i++) {
- if (buffer[i].Cache.Level == 3 && cache_level == 3) {
- cache_size = buffer[i].Cache.Size;
- break;
- }
- if (buffer[i].Cache.Level == 2 && cache_level == 2) {
- cache_size = buffer[i].Cache.Size;
- break;
- }
- if (buffer[i].Cache.Level == 1 && cache_level == 1) {
- cache_size = buffer[i].Cache.Size;
- break;
- }
- }
- delete[] buffer;
- return cache_size;
- }
- int Get_Num_Elements(int cache_size) {
- int memory = (cache_size * 0.8 / 3); // размер кэша в байтах, процент заполнения кэша матрицай, количество равных частей кэша (3 матрицы в нашем случае)
- int double_num = memory / sizeof(double); // количество элементов типа double
- double_num = sqrt(double_num); // количество строк/столбцов в матрице
- while (double_num % Cache_Line != 0) { // кратность 64 байтам
- double_num--;
- }
- return double_num;
- }
- void Division_Matrix(int cache_size, int &block_sizee, int &block_size_bord) {
- for (int i = cache_size; i > Cache_Line; i--) {
- if ((i % Cache_Line == 0) && (Matrix_Size % i == 0)) {
- block_sizee = i;
- block_size_bord = i;
- return;
- }
- }
- block_sizee = cache_size;
- block_size_bord = Matrix_Size;
- while (block_size_bord > block_sizee) {
- block_size_bord -= block_sizee;
- }
- return;
- }
- void Multiply_default(double** matrix_A, double** matrix_B, double** matrix_Res) {
- for (int i = 0; i < Matrix_Size; i++) {
- for (int j = 0; j < Matrix_Size; j++) {
- for (int k = 0; k < Matrix_Size; k++) {
- matrix_Res[i][j] += matrix_A[i][k] * matrix_B[k][j];
- }
- }
- }
- }
- void Block_Matrix_Multiply(double **matrix_A, double **matrix_B, double **matrix_Res, int sizeBlock, int block_size_bord) {
- int count = Matrix_Size / sizeBlock;
- if (Matrix_Size % sizeBlock != 0) {
- count++;
- }
- for (int i = 0, sizeIBlock = sizeBlock; i < count; i++) {
- if (i == count - 1)
- sizeIBlock = block_size_bord;
- for (int j = 0, sizeJBlock = sizeBlock; j < count; j++) {
- if (j == count - 1)
- sizeJBlock = block_size_bord;
- for (int k = 0, sizeKBlock = sizeBlock; k < count; k++) {
- if (k == count - 1) {
- sizeKBlock = block_size_bord;
- }
- for (int l = 0; l < sizeIBlock; l++) {
- for (int n = 0; n < sizeKBlock; n++) {
- for (int m = 0; m < sizeJBlock; m++) {
- matrix_Res[i * sizeBlock + l][j * sizeBlock + m] +=
- matrix_A[i * sizeBlock + l][k * sizeBlock + n] *
- matrix_B[k * sizeBlock + n][j * sizeBlock + m];
- }
- }
- }
- }
- }
- }
- }
- void Multiply_OpenMP(double** matrix_A, double** matrix_B, double** matrix_Res) {
- #pragma omp parallel for
- for (int i = 0; i < Matrix_Size; i++) {
- for (int k = 0; k < Matrix_Size; k++) {
- for (int j = 0; j < Matrix_Size; j++) {
- matrix_Res[i][j] += matrix_A[i][k] * matrix_B[k][j];
- }
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement