Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- static void sqrm(double *src, double *buffer, int size)
- {
- memset(buffer, 0, size * size * sizeof(double));
- #if defined(SEPARATED)
- #pragma omp parallel for
- for (int i = 0; i < size; i++)
- for (int k = 0; k < size; k++)
- for (int j = 0; j < size; j++)
- buffer[i * size + j] += src[i * size + k] * src[k * size + j];
- #pragma omp parallel for
- for (int i = 0; i < size; i++)
- for (int j = 0; j < size; j++)
- src[j * size + i] = buffer[i * size + j];
- #elif defined(BLOCK_ROW)
- #pragma omp parallel for
- for (int ib = 0; ib < size; ib += BLOCK_SIZE)
- {
- int ie = ib + BLOCK_SIZE;
- if (size < ie)
- ie = size;
- for (int jb = 0; jb < size; jb += BLOCK_SIZE)
- {
- int je = jb + BLOCK_SIZE;
- if (size < je)
- je = size;
- for (int ii = ib; ii < ie; ii++)
- for (int kk = jb; kk < je; kk++)
- for (int jj = 0; jj < size; jj++)
- buffer[ii * size + jj] += src[ii * size + kk] * src[kk * size + jj];
- }
- }
- #pragma omp parallel for
- for (int i = 0; i < size; i++)
- for (int j = 0; j < size; j++)
- src[j * size + i] = buffer[i * size + j];
- #elif defined(BLOCK_BLOCK)
- #pragma omp parallel for
- for (int ib = 0; ib < size; ib += BLOCK_SIZE)
- {
- int ie = ib + BLOCK_SIZE;
- if (size < ie)
- ie = size;
- for (int kb = 0; kb < size; kb += BLOCK_SIZE)
- {
- int ke = kb + BLOCK_SIZE;
- if (size < ke)
- ke = size;
- for (int jb = 0; jb < size; jb += BLOCK_SIZE)
- {
- int je = jb + BLOCK_SIZE;
- if (size < je)
- je = size;
- for (int ii = ib; ii < ie; ii++)
- for (int kk = kb; kk < ke; kk++)
- for (int jj = jb; je < je; jj++)
- buffer[ii * size + jj] += src[ii * size + kk] * src[kk * size + jj];
- }
- }
- }
- #pragma omp parallel for
- for (int i = 0; i < size; i++)
- for (int j = 0; j < size; j++)
- src[j * size + i] = buffer[i * size + j];
- #endif
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement