#include #include #include #include #include #include #include using namespace std; HANDLE hOut; float **matrixA, **matrixB, **matrixC, **correctResult; int matrixSize, threadsNum, seed; enum algorithm { sequential, parallel_1, parallel_2, parallel_3 }; enum order { KJI, IKJ }; void initMatrices(int matrixSize, int seed) { srand(seed); for (int i = 0; i < matrixSize; i++) for (int j = 0; j < matrixSize; j++) { matrixA[i][j] = rand() % 100; matrixB[i][j] = rand() % 100; matrixC[i][j] = 0; } } void printMatrices() { for (int i = 0; i < matrixSize; i++) { for (int j = 0; j < matrixSize; j++) { printf("%f ", matrixA[i][j]); } printf("\n"); }printf("\n"); for (int i = 0; i < matrixSize; i++) { for (int j = 0; j < matrixSize; j++) { printf("%f ", matrixB[i][j]); } printf("\n"); }printf("\n"); for (int i = 0; i < matrixSize; i++) { for (int j = 0; j < matrixSize; j++) { printf("%f ", matrixC[i][j]); } printf("\n"); }printf("\n"); } void sequential_algorithm_KJI () { for (int k = 0; k < matrixSize; k++) { for (int j = 0; j < matrixSize; j++) { float tmp = matrixB[k][j]; for (int i = 0; i < matrixSize; i++) matrixC[i][j] += matrixA[i][k] * tmp; } } } void parallel_algorithm1_KJI() { #pragma omp parallel num_threads(threadsNum) { #pragma omp for for (int k = 0; k < matrixSize; k++) { for (int j = 0; j < matrixSize; j++) { float tmp = matrixB[k][j]; for (int i = 0; i < matrixSize; i++) #pragma omp atomic matrixC[i][j] += matrixA[i][k] * tmp; } } } } void parallel_algorithm2_KJI() { #pragma omp parallel num_threads(threadsNum) { for (int k = 0; k < matrixSize; k++) { #pragma omp for for (int j = 0; j < matrixSize; j++) { float tmp = matrixB[k][j]; for (int i = 0; i < matrixSize; i++) matrixC[i][j] += matrixA[i][k] * tmp; } } } } void parallel_algorithm3_KJI() { #pragma omp parallel num_threads(threadsNum) { for (int k = 0; k < matrixSize; k++) { for (int j = 0; j < matrixSize; j++) { float tmp = matrixB[k][j]; #pragma omp for for (int i = 0; i < matrixSize; i++) matrixC[i][j] += matrixA[i][k] * tmp; } } } } void sequential_algorithm_IKJ() { for (int i = 0; i < matrixSize; i++) for (int k = 0; k < matrixSize; k++) for (int j = 0; j < matrixSize; j++) matrixC[i][j] += matrixA[i][k] * matrixB[k][j]; } void parallel_algorithm1_IKJ() { #pragma omp parallel for for (int i = 0; i < matrixSize; i++) for (int k = 0; k < matrixSize; k++) for (int j = 0; j < matrixSize; j++) matrixC[i][j] += matrixA[i][k] * matrixB[k][j]; } void parallel_algorithm2_IKJ() { for (int i = 0; i < matrixSize; i++) { #pragma omp parallel for for (int k = 0; k < matrixSize; k++) for (int j = 0; j < matrixSize; j++) #pragma omp atomic matrixC[i][j] += matrixA[i][k] * matrixB[k][j]; } } void parallel_algorithm3_IKJ() { for (int i = 0; i < matrixSize; i++) for (int k = 0; k < matrixSize; k++) { #pragma omp parallel for for (int j = 0; j < matrixSize; j++) matrixC[i][j] += matrixA[i][k] * matrixB[k][j]; } } void checkResult(int matrixSize, float **matrix) { for (int i = 0; i < matrixSize; i++) for (int j = 0; j < matrixSize; j++) if (matrix[i][j] != correctResult[i][j]) { SetConsoleTextAttribute(hOut, 12); printf("\tWrong!\n"); SetConsoleTextAttribute(hOut, 7); return; } SetConsoleTextAttribute(hOut, 10); printf("\tCorrect!\n"); SetConsoleTextAttribute(hOut, 7); } void startTest(algorithm algorytm, order kolejnosc) { initMatrices(matrixSize, seed); double start, stop; switch (algorytm) { case sequential: switch (kolejnosc) { case KJI: start = (double)clock() / CLK_TCK; sequential_algorithm_KJI(); stop = (double)clock() / CLK_TCK; SetConsoleTextAttribute(hOut, 7); printf("SEKWENCYJNY\t(KJI)\t"); break; case IKJ: start = (double)clock() / CLK_TCK; sequential_algorithm_IKJ(); stop = (double)clock() / CLK_TCK; SetConsoleTextAttribute(hOut, 8); printf("SEKWENCYJNY\t(IKJ)\t"); break; } break; case parallel_1: switch (kolejnosc) { case KJI: start = (double)clock() / CLK_TCK; parallel_algorithm1_KJI(); stop = (double)clock() / CLK_TCK; SetConsoleTextAttribute(hOut, 7); printf("ROWNOLEGLY 1\t(KJI)\t"); break; case IKJ: start = (double)clock() / CLK_TCK; parallel_algorithm1_KJI(); stop = (double)clock() / CLK_TCK; SetConsoleTextAttribute(hOut, 8); printf("ROWNOLEGLY 1\t(IKJ)\t"); break; } break; case parallel_2: switch (kolejnosc) { case KJI: start = (double)clock() / CLK_TCK; parallel_algorithm2_KJI(); stop = (double)clock() / CLK_TCK; SetConsoleTextAttribute(hOut, 7); printf("ROWNOLEGLY 2\t(KJI)\t"); break; case IKJ: start = (double)clock() / CLK_TCK; parallel_algorithm2_IKJ(); stop = (double)clock() / CLK_TCK; SetConsoleTextAttribute(hOut, 8); printf("ROWNOLEGLY 2\t(IKJ)\t"); break; } break; case parallel_3: switch (kolejnosc) { case KJI: start = (double)clock() / CLK_TCK; parallel_algorithm3_KJI(); stop = (double)clock() / CLK_TCK; SetConsoleTextAttribute(hOut, 7); printf("ROWNOLEGLY 3\t(KJI)\t"); break; case IKJ: start = (double)clock() / CLK_TCK; parallel_algorithm3_IKJ(); stop = (double)clock() / CLK_TCK; SetConsoleTextAttribute(hOut, 8); printf("ROWNOLEGLY 3\t(IKJ)\t"); break; } break; default: printf("Niepoprawna nazwa algorytmu!"); return; } printf("%8.4f sec", stop - start); SetConsoleTextAttribute(hOut, 7); checkResult(matrixSize, matrixC); } void newInstance(int tNum, int mSize, int sd) { threadsNum = tNum; matrixSize = mSize; seed = sd; matrixA = new float*[matrixSize]; matrixB = new float*[matrixSize]; matrixC = new float*[matrixSize]; correctResult = new float*[matrixSize]; for (int i = 0; i < matrixSize; ++i) { matrixA[i] = new float[matrixSize]; matrixB[i] = new float[matrixSize]; matrixC[i] = new float[matrixSize]; correctResult[i] = new float[matrixSize]; } hOut = GetStdHandle(STD_OUTPUT_HANDLE); SetConsoleTextAttribute(hOut, 15); printf("\n\n\n\n\nTrwa przygotowanie nowej instancji...\n\n\n"); SetConsoleTextAttribute(hOut, 7); initMatrices(matrixSize, seed); sequential_algorithm_KJI(); for (int i = 0; i < matrixSize; i++) for (int j = 0; j < matrixSize; j++) correctResult[i][j] = matrixC[i][j]; printf("[ThreadNum: %d\t MSize: %d\tSeed: %d]\n\n", threadsNum, matrixSize, seed); } int main(int argc, char* argv[]) { for (int i = 100; i < 3000; i+=100) { newInstance(8, i, 100); startTest(sequential, KJI); startTest(parallel_1, KJI); startTest(parallel_2, KJI); startTest(parallel_3, KJI); startTest(sequential, IKJ); startTest(parallel_1, IKJ); startTest(parallel_2, IKJ); startTest(parallel_3, IKJ); } _getch(); return 1; }