Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- void
- m_mult (double *a, double *b, double *c, int N, int M, int K)
- {
- int i, j, k;
- double s01, s02, s03, s04, s05, s06, s07, s08, s09, s10, s11, s12, s13, s14, s15, s16;
- if (M == 0 || K == 0)
- M = K = N;
- for (i = 0; i < N - 3; i += 4)
- {
- for (j = 0; j < K - 3; j += 4)
- {
- s01 = s02 = s03 = s04 = 0;
- s05 = s06 = s07 = s08 = 0;
- s09 = s10 = s11 = s12 = 0;
- s13 = s14 = s15 = s16 = 0;
- for (k = 0; k < M; k++)
- {
- s01 += a[(i + 0) * M + k] * b[k * K + j + 0];
- s02 += a[(i + 0) * M + k] * b[k * K + j + 1];
- s03 += a[(i + 0) * M + k] * b[k * K + j + 2];
- s04 += a[(i + 0) * M + k] * b[k * K + j + 3];
- s05 += a[(i + 1) * M + k] * b[k * K + j + 0];
- s06 += a[(i + 1) * M + k] * b[k * K + j + 1];
- s07 += a[(i + 1) * M + k] * b[k * K + j + 2];
- s08 += a[(i + 1) * M + k] * b[k * K + j + 3];
- s09 += a[(i + 2) * M + k] * b[k * K + j + 0];
- s10 += a[(i + 2) * M + k] * b[k * K + j + 1];
- s11 += a[(i + 2) * M + k] * b[k * K + j + 2];
- s12 += a[(i + 2) * M + k] * b[k * K + j + 3];
- s13 += a[(i + 3) * M + k] * b[k * K + j + 0];
- s14 += a[(i + 3) * M + k] * b[k * K + j + 1];
- s15 += a[(i + 3) * M + k] * b[k * K + j + 2];
- s16 += a[(i + 3) * M + k] * b[k * K + j + 3];
- }
- c[(i + 0) * K + j + 0] = s01;
- c[(i + 0) * K + j + 1] = s02;
- c[(i + 0) * K + j + 2] = s03;
- c[(i + 0) * K + j + 3] = s04;
- c[(i + 1) * K + j + 0] = s05;
- c[(i + 1) * K + j + 1] = s06;
- c[(i + 1) * K + j + 2] = s07;
- c[(i + 1) * K + j + 3] = s08;
- c[(i + 2) * K + j + 0] = s09;
- c[(i + 2) * K + j + 1] = s10;
- c[(i + 2) * K + j + 2] = s11;
- c[(i + 2) * K + j + 3] = s12;
- c[(i + 3) * K + j + 0] = s13;
- c[(i + 3) * K + j + 1] = s14;
- c[(i + 3) * K + j + 2] = s15;
- c[(i + 3) * K + j + 3] = s16;
- }
- }
- for (; i < N; i++)
- {
- for (j = 0; j < K - 3; j += 4)
- {
- s01 = s02 = s03 = s04 = 0;
- for (k = 0; k < M; k++)
- {
- s01 += a[(i + 0) * M + k] * b[k * K + j + 0];
- s02 += a[(i + 0) * M + k] * b[k * K + j + 1];
- s03 += a[(i + 0) * M + k] * b[k * K + j + 2];
- s04 += a[(i + 0) * M + k] * b[k * K + j + 3];
- }
- c[(i + 0) * K + j + 0] = s01;
- c[(i + 0) * K + j + 1] = s02;
- c[(i + 0) * K + j + 2] = s03;
- c[(i + 0) * K + j + 3] = s04;
- }
- }
- for (i = 0; i < N - 3; i += 4)
- {
- for (j = K - (K & 3); j < K; j++)
- {
- s01 = s05 = s09 = s13 = 0;
- for (k = 0; k < M; k++)
- {
- s01 += a[(i + 0) * M + k] * b[k * K + j];
- s05 += a[(i + 1) * M + k] * b[k * K + j];
- s09 += a[(i + 2) * M + k] * b[k * K + j];
- s13 += a[(i + 3) * M + k] * b[k * K + j];
- }
- c[(i + 0) * K + j] = s01;
- c[(i + 1) * K + j] = s05;
- c[(i + 2) * K + j] = s09;
- c[(i + 3) * K + j] = s13;
- }
- }
- for (; i < N; i++)
- {
- for (j = K - (K & 3); j < K; j++)
- {
- s01 = 0;
- for (k = 0; k < M; k++)
- {
- s01 += a[i * M + k] * b[k * K + j];
- }
- c[i * K + j] = s01;
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement