Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <assert.h>
- #include <stdbool.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <omp.h>
- // Default is 16 for each dimension
- static int N;
- #define A(i,j) A[i*N+j] // A: N x N
- #define B(i,j) B[i*N+j] // B: N x N
- #define C(i,j) C[i*N+j] // C: N x N
- #define D(i,j) D[i*N+j] // D: N x N
- static double *A, *B, *C, *D;
- static void mm_base(double *A, double *B, double *C, int n)
- {
- int i, j, k;
- for (i = 0; i < n; i++) {
- for (k = 0; k < n; k++) {
- for (j = 0; j < n; j++) {
- C(i,j) += A(i,k) * B(k,j);
- }
- }
- }
- }
- #ifndef THRESHOLD
- #define THRESHOLD 4
- #endif
- static void mm_dac(double *A, double *B, double *C, int size)
- {
- if (size <= THRESHOLD) {
- mm_base(A, B, C, size);
- } else {
- // Submatrix offsets
- int s11 = 0; // Upper left
- int s12 = size/2; // Upper right
- int s21 = (size/2) * N; // Lower left
- int s22 = (size/2) * N + size/2; // Lower right
- // Recursive calls
- #pragma omp task
- mm_dac(A+s11, B+s11, C+s11, size/2); // C11 = A11 * B11
- #pragma omp task
- mm_dac(A+s11, B+s12, C+s12, size/2); // C12 = A11 * B12
- #pragma omp task
- mm_dac(A+s21, B+s11, C+s21, size/2); // C21 = A21 * B11
- mm_dac(A+s21, B+s12, C+s22, size/2); // C22 = A21 * B12
- #pragma omp taskwait
- #pragma omp task
- mm_dac(A+s12, B+s21, C+s11, size/2); // C11 += A12 * B21
- #pragma omp task
- mm_dac(A+s12, B+s22, C+s12, size/2); // C12 += A12 * B22
- #pragma omp task
- mm_dac(A+s22, B+s21, C+s21, size/2); // C21 += A22 * B21
- mm_dac(A+s22, B+s22, C+s22, size/2); // C22 += A22 * B22
- #pragma omp taskwait
- }
- }
- static bool is_power_of_two(int n)
- {
- return n != 0 && (n & (n-1)) == 0;
- }
- static bool equals(double *A, double *B)
- {
- int i, j;
- for (i = 0; i < N; i++) {
- for (j = 0; j < N; j++) {
- if (A(i,j) != B(i,j)) {
- return false;
- }
- }
- }
- return true;
- }
- int main(int argc, char *argv[])
- {
- int i, j;
- N = (argc > 1) ? atoi(argv[1]) : 16;
- assert(is_power_of_two(N));
- // Allocate matrices
- A = (double *)malloc(N * N * sizeof(double));
- B = (double *)malloc(N * N * sizeof(double));
- C = (double *)malloc(N * N * sizeof(double));
- D = (double *)malloc(N * N * sizeof(double));
- assert(A && B && C && D);
- // Setup matrix A
- for (i = 0; i < N; i++) {
- for (j = 0; j < N; j++) {
- A(i,j) = i * N + j + 1;
- }
- }
- // Setup matrix B
- for (i = 0; i < N; i++) {
- for (j = 0; j < N; j++) {
- B(i,j) = (j >= i) ? 1 : 0;
- }
- }
- // Setup matrices C and D
- for (i = 0; i < N; i++) {
- for (j = 0; j < N; j++) {
- C(i,j) = D(i,j) = 0.0;
- }
- }
- #pragma omp parallel
- #pragma omp master
- mm_dac(A, B, C, N);
- mm_base(A, B, D, N);
- assert(equals(C, D));
- free(A);
- free(B);
- free(C);
- free(D);
- return 0;
- }
Add Comment
Please, Sign In to add comment