Advertisement
overwater

Untitled

Mar 17th, 2016
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 4.84 KB | None | 0 0
  1. #include <iostream>
  2. #include <vector>
  3. #include <omp.h>
  4. #include <stdlib.h>
  5. using namespace std;
  6.  
  7.  
  8.  
  9. // (n x m) * (m x k) = (n * k)
  10. vector < vector <int> > simple_multiply_external_cycle(const vector < vector <int> > &a, const vector < vector <int> > &b, int num_threads) {
  11.     int n = a.size(), m = a[0].size(), k = b[0].size();
  12.     vector < vector <int> > c(n, vector <int>(k, 0));
  13.  
  14.     #pragma omp parallel for num_threads(num_threads)
  15.     for (int i = 0; i < n; ++i) {
  16.         for (int j = 0; j < k; ++j)
  17.             for (int z = 0; z < m; ++z)
  18.                 c[i][j] += a[i][z] * b[z][j];
  19.     }
  20.     return c;
  21. }
  22.  
  23. vector < vector <int> > simple_multiply_internal_cycle(const vector < vector <int> > &a, const vector < vector <int> > &b, int num_threads) {
  24.     int n = a.size(), m = a[0].size(), k = b[0].size();
  25.     vector < vector <int> > c(n, vector <int>(k, 0));
  26.  
  27.     for (int i = 0; i < n; ++i)
  28.         #pragma omp parallel for num_threads(num_threads)
  29.         for (int j = 0; j < k; ++j)
  30.             for (int z = 0; z < m; ++z)
  31.                 c[i][j] += a[i][z] * b[z][j];
  32.  
  33.     return c;
  34. }
  35.  
  36. vector < vector <int> > hack_multiply_external_cycle(const vector < vector <int> > &a, const vector < vector <int> > &b, int num_threads) {
  37.     int n = a.size(), m = a[0].size(), k = b[0].size();
  38.     vector < vector <int> > c(n, vector <int>(k, 0));
  39.  
  40.     #pragma omp parallel for num_threads(num_threads)
  41.     for (int i = 0; i < n; ++i) {
  42.         for (int z = 0; z < m; ++z)
  43.             for (int j = 0; j < k; ++j)
  44.                 c[i][j] += a[i][z] * b[z][j];
  45.     }
  46.     return c;
  47. }
  48.  
  49. vector < vector <int> > hack_multiply_internal_cycle(const vector < vector <int> > &a, const vector < vector <int> > &b, int num_threads) {
  50.     int n = a.size(), m = a[0].size(), k = b[0].size();
  51.     vector < vector <int> > c(n, vector <int>(k, 0));
  52.  
  53.     for (int i = 0; i < n; ++i)
  54.         #pragma omp parallel for num_threads(num_threads)
  55.         for (int z = 0; z < m; ++z)
  56.             for (int j = 0; j < k; ++j)
  57.                 c[i][j] += a[i][z] * b[z][j];
  58.     return c;
  59. }
  60.  
  61. double get_time(double start_time) {
  62.     return omp_get_wtime() - start_time;
  63. }
  64.  
  65. void generate(vector < vector <int> > &a, vector < vector <int> > &b, int seed) {
  66.     srand(seed);
  67.     for (int i = 0; i < a.size(); ++i)
  68.     for (int j = 0; j < a[i].size(); ++j)
  69.         a[i][j] = rand() % 201 - 100;
  70.  
  71.     for (int i = 0; i < b.size(); ++i)
  72.     for (int j = 0; j < b[i].size(); ++j)
  73.         b[i][j] = rand() % 201 - 100;
  74. }
  75.  
  76. bool check(vector <vector <int> > answer, vector < vector <int> > test) {
  77.     for (int i = 0; i < answer.size(); ++i)
  78.     for (int j = 0; j < answer[i].size(); ++j)
  79.     if (answer[i][j] != test[i][j])
  80.         return false;
  81.     return true;
  82. }
  83.  
  84. void mult_block(const vector < vector <int> > &a, const vector < vector <int> > &b, vector < vector <int> > &c, int r, int i_gl, int j_gl, int z_gl) {
  85.     for (int i = i_gl * r; i < (i_gl + 1) * r; ++i) {
  86.         for (int j = j_gl * r; j < (j_gl + 1) * r; ++j) {
  87.             for (int z = z_gl * r; z < (z_gl + 1) * r; ++z) {
  88.                 c[i][j] += a[i][z] * b[z][j];
  89.             }
  90.         }
  91.     }
  92. }
  93.  
  94. vector < vector <int> > block_multiply_external_cycle(const vector < vector <int> > &a, const vector < vector <int> > &b, int num_threads, int r) {
  95.     int n1 = a.size() / r, m1 = a[0].size() / r, k1 = b[0].size() / r;
  96.     vector < vector <int> > c(n1 * r, vector <int>(k1 * r, 0));
  97.  
  98.     #pragma omp parallel for num_threads(num_threads)
  99.     for (int i = 0; i < n1; ++i)
  100.         for (int j = 0; j < k1; ++j)
  101.             for (int z = 0; z < m1; ++z)
  102.                 mult_block(a, b, c, r, i, j, z);
  103.     return c;
  104. }
  105.  
  106. vector < vector <int> > block_multiply_internal_cycle(const vector < vector <int> > &a, const vector < vector <int> > &b, int num_threads, int r) {
  107.     int n1 = a.size(), m1 = a[0].size() / r, k1 = b[0].size() / r;
  108.     vector < vector <int> > c(n1 * r, vector <int>(k1 * r, 0));
  109.  
  110.     for (int i = 0; i < n1; ++i)
  111.         #pragma omp parallel for num_threads(num_threads)
  112.         for (int j = 0; j < k1; ++j)
  113.             for (int z = 0; z < m1; ++z)
  114.                 mult_block(a, b, c, r, i, j, z);
  115.     return c;
  116. }
  117.  
  118.  
  119. int main() {
  120.     int size = 1024;
  121.     vector < vector <int> > a(size, vector <int>(size)), b(size, vector <int>(size));
  122.     generate(a, b, 1);
  123.     cout << "Generating complete.\n";
  124.  
  125.     vector <vector <int> > c = simple_multiply_external_cycle(a, b, 1);
  126.     cout << "Testing external cycle (block size = 8):\n";
  127.     for (int i = 1; i < 9; ++i) {
  128.         double start_time = omp_get_wtime();
  129.         vector <vector <int> > s = simple_multiply_external_cycle(a, b, i);
  130.         double simple_time = get_time(start_time);
  131.         start_time = omp_get_wtime();
  132.         vector <vector <int> > h = hack_multiply_external_cycle(a, b, i);
  133.         double hack_time = get_time(start_time);
  134.         start_time = omp_get_wtime();
  135.         vector <vector <int> > bl = block_multiply_external_cycle(a, b, i, 8);
  136.         double block_time = get_time(start_time);
  137.         cout << i << " threads:\n   s = " << simple_time << ", correct = " << check(c, s) <<
  138.             "\n   h = " << hack_time << ", correct = " << check(c, h) <<
  139.             "\n   b = " << block_time << ", correct = " << check(c, bl) << endl;
  140.     }
  141.  
  142.     return 0;
  143. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement