Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <vector>
- #include <omp.h>
- #include <stdlib.h>
- using namespace std;
- // (n x m) * (m x k) = (n * k)
- vector < vector <int> > simple_multiply_external_cycle(const vector < vector <int> > &a, const vector < vector <int> > &b, int num_threads) {
- int n = a.size(), m = a[0].size(), k = b[0].size();
- vector < vector <int> > c(n, vector <int>(k, 0));
- #pragma omp parallel for num_threads(num_threads)
- for (int i = 0; i < n; ++i) {
- for (int j = 0; j < k; ++j)
- for (int z = 0; z < m; ++z)
- c[i][j] += a[i][z] * b[z][j];
- }
- return c;
- }
- vector < vector <int> > simple_multiply_internal_cycle(const vector < vector <int> > &a, const vector < vector <int> > &b, int num_threads) {
- int n = a.size(), m = a[0].size(), k = b[0].size();
- vector < vector <int> > c(n, vector <int>(k, 0));
- for (int i = 0; i < n; ++i)
- #pragma omp parallel for num_threads(num_threads)
- for (int j = 0; j < k; ++j)
- for (int z = 0; z < m; ++z)
- c[i][j] += a[i][z] * b[z][j];
- return c;
- }
- vector < vector <int> > hack_multiply_external_cycle(const vector < vector <int> > &a, const vector < vector <int> > &b, int num_threads) {
- int n = a.size(), m = a[0].size(), k = b[0].size();
- vector < vector <int> > c(n, vector <int>(k, 0));
- #pragma omp parallel for num_threads(num_threads)
- for (int i = 0; i < n; ++i) {
- for (int z = 0; z < m; ++z)
- for (int j = 0; j < k; ++j)
- c[i][j] += a[i][z] * b[z][j];
- }
- return c;
- }
- vector < vector <int> > hack_multiply_internal_cycle(const vector < vector <int> > &a, const vector < vector <int> > &b, int num_threads) {
- int n = a.size(), m = a[0].size(), k = b[0].size();
- vector < vector <int> > c(n, vector <int>(k, 0));
- for (int i = 0; i < n; ++i)
- #pragma omp parallel for num_threads(num_threads)
- for (int z = 0; z < m; ++z)
- for (int j = 0; j < k; ++j)
- c[i][j] += a[i][z] * b[z][j];
- return c;
- }
- double get_time(double start_time) {
- return omp_get_wtime() - start_time;
- }
- void generate(vector < vector <int> > &a, vector < vector <int> > &b, int seed) {
- srand(seed);
- for (int i = 0; i < a.size(); ++i)
- for (int j = 0; j < a[i].size(); ++j)
- a[i][j] = rand() % 201 - 100;
- for (int i = 0; i < b.size(); ++i)
- for (int j = 0; j < b[i].size(); ++j)
- b[i][j] = rand() % 201 - 100;
- }
- bool check(vector <vector <int> > answer, vector < vector <int> > test) {
- for (int i = 0; i < answer.size(); ++i)
- for (int j = 0; j < answer[i].size(); ++j)
- if (answer[i][j] != test[i][j])
- return false;
- return true;
- }
- void mult_block(const vector < vector <int> > &a, const vector < vector <int> > &b, vector < vector <int> > &c, int r, int i_gl, int j_gl, int z_gl) {
- for (int i = i_gl * r; i < (i_gl + 1) * r; ++i) {
- for (int j = j_gl * r; j < (j_gl + 1) * r; ++j) {
- for (int z = z_gl * r; z < (z_gl + 1) * r; ++z) {
- c[i][j] += a[i][z] * b[z][j];
- }
- }
- }
- }
- vector < vector <int> > block_multiply_external_cycle(const vector < vector <int> > &a, const vector < vector <int> > &b, int num_threads, int r) {
- int n1 = a.size() / r, m1 = a[0].size() / r, k1 = b[0].size() / r;
- vector < vector <int> > c(n1 * r, vector <int>(k1 * r, 0));
- #pragma omp parallel for num_threads(num_threads)
- for (int i = 0; i < n1; ++i)
- for (int j = 0; j < k1; ++j)
- for (int z = 0; z < m1; ++z)
- mult_block(a, b, c, r, i, j, z);
- return c;
- }
- vector < vector <int> > block_multiply_internal_cycle(const vector < vector <int> > &a, const vector < vector <int> > &b, int num_threads, int r) {
- int n1 = a.size(), m1 = a[0].size() / r, k1 = b[0].size() / r;
- vector < vector <int> > c(n1 * r, vector <int>(k1 * r, 0));
- for (int i = 0; i < n1; ++i)
- #pragma omp parallel for num_threads(num_threads)
- for (int j = 0; j < k1; ++j)
- for (int z = 0; z < m1; ++z)
- mult_block(a, b, c, r, i, j, z);
- return c;
- }
- int main() {
- int size = 1024;
- vector < vector <int> > a(size, vector <int>(size)), b(size, vector <int>(size));
- generate(a, b, 1);
- cout << "Generating complete.\n";
- vector <vector <int> > c = simple_multiply_external_cycle(a, b, 1);
- cout << "Testing external cycle (block size = 8):\n";
- for (int i = 1; i < 9; ++i) {
- double start_time = omp_get_wtime();
- vector <vector <int> > s = simple_multiply_external_cycle(a, b, i);
- double simple_time = get_time(start_time);
- start_time = omp_get_wtime();
- vector <vector <int> > h = hack_multiply_external_cycle(a, b, i);
- double hack_time = get_time(start_time);
- start_time = omp_get_wtime();
- vector <vector <int> > bl = block_multiply_external_cycle(a, b, i, 8);
- double block_time = get_time(start_time);
- cout << i << " threads:\n s = " << simple_time << ", correct = " << check(c, s) <<
- "\n h = " << hack_time << ", correct = " << check(c, h) <<
- "\n b = " << block_time << ", correct = " << check(c, bl) << endl;
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement