Advertisement
Guest User

Untitled

a guest
Sep 19th, 2019
119
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.37 KB | None | 0 0
  1. // Copyright 2018 Delft University of Technology
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14.  
  15. // OpenMP:
  16. #include <omp.h>
  17.  
  18. #include "../matmult.hpp"
  19.  
  20. #define IMPROVEMENT_X
  21.  
  22. /* You may not remove these pragmas: */
  23. /*************************************/
  24. #pragma GCC push_options
  25. #pragma GCC optimize ("O1")
  26. /*************************************/
  27.  
  28. Matrix<float> multiplyMatricesOMP(Matrix<float> a,
  29. Matrix<float> b,
  30. int num_threads) {
  31. //Verifies the dimension of the matrixes, it must be possible to multiply the matrixes
  32. if (a.columns != b.rows){
  33. throw std::domain_error("Matrix dimensions do not allow matrix-multiplication.");
  34. }
  35. //Creates result matrix
  36. auto result = Matrix<float>(a.rows , b.columns);
  37. //Gives OpenMP the number of threads to be initialized
  38. omp_set_num_threads(num_threads);
  39.  
  40.  
  41. //This version is not as eficient as it could be.
  42. //The fact that by dividing rows of matrix a to each thread,
  43. //some threads might work less than others, dependig on the size of the matrix and number of threads.
  44. //Meadning there is time wasted in waiting for threads with more work than others
  45. #ifdef FIRST_ATEMPT
  46. //Initializes the threads
  47. #pragma omp parallel
  48. {
  49. //Gets the ID from it's own thread
  50. int ID = omp_get_thread_num();
  51. //Divides the rows for each thread to work with
  52. for (size_t r = ID * a.rows / num_threads ; r < (ID + 1) * a.rows / num_threads ; r++) {
  53. //For each column
  54. for (size_t c = 0; columns < b.columns; c++) {
  55. //Initializes the variable for the sum of the elements and sets it to 0
  56. float sum = 0.0;
  57. //Accumulate the product of the elements
  58. for (size_t i = 0; i < b.rows; i++) {
  59. sum += a(r, i) * b(i, c);
  60. }
  61.  
  62. //Store the result
  63. result(r, c) = sum;
  64. }
  65. }
  66. }
  67. #endif
  68.  
  69. //By dividing each calculation of the elements in the matrix result for each thread,
  70. //the diference of work between each thread becomes smaller.
  71. #ifdef IMPROVEMENT_N1
  72. //Initializes the threads
  73. #pragma omp parallel
  74. {
  75. //Gets the ID from it's own thread
  76. int ID = omp_get_thread_num();
  77.  
  78. //Divides each element od the result matrix for each thread to work with
  79. for (size_t idx = ID ; idx < (result.rows - 1) * result.columns + result.columns ; idx += num_threads) {
  80. //Initializes the variable for the sum of the elements and sets it to 0
  81. float sum = 0.0;
  82. //Goes through the elements of each column of matrix a and multiplies to the coresponding element of a row in b
  83. for (size_t c = 0; c < a.columns; c++) {
  84. //Accumulate the product of the elements
  85. sum += a(idx / result.columns, c) * b(c, idx % result.columns);
  86. }
  87. //Store the result
  88. result[idx] = sum;
  89. }
  90. }
  91. #endif
  92.  
  93. }
  94. return result;
  95. }
  96.  
  97. Matrix<double> multiplyMatricesOMP(Matrix<double> a,
  98. Matrix<double> b,
  99. int num_threads) {
  100. //Verifies the dimension of the matrixes, it must be possible to multiply the matrixes
  101. if (a.columns != b.rows){
  102. throw std::domain_error("Matrix dimensions do not allow matrix-multiplication.");
  103. }
  104. //Creates result matrixhttps://github.com/pa
  105. auto result = Matrix<double>(a.rows , b.columns);
  106. //Gives OpenMP the number of threads to be initialized
  107. omp_set_num_threads(num_threads);
  108.  
  109. //This version is not as eficient as it could be.
  110. //The fact that by dividing rows of matrix a to each thread,
  111. //some threads might work less than others, dependig on the size of the matrix and number of threads.
  112. //Meadning there is time wasted in waiting for threads with more work than others
  113. #ifdef FIRST_ATEMPT
  114. //Initializes the threads
  115. #pragma omp parallel
  116. {
  117. //Gets the ID from it's own thread
  118. int ID = omp_get_thread_num();
  119. //Divides the rows for each thread to work with
  120. for (size_t r = ID * a.rows / num_threads ; r < (ID + 1) * a.rows / num_threads ; r++) {
  121. //For each column
  122. for (size_t c = 0; columns < b.columns; c++) {
  123. //Initializes the variable for the sum of the elements and sets it to 0
  124. double sum = 0.0;
  125. //Accumulate the product of the elements
  126. for (size_t i = 0; i < b.rows; i++) {
  127. sum += a(r, i) * b(i, c);
  128. }
  129.  
  130. //Store the result
  131. result(r, c) = sum;
  132. }
  133. }
  134. }
  135. #endif
  136.  
  137. //By dividing each calculation of the elements in the matrix result for each thread,
  138. //the diference of work between each thread becomes smaller.
  139. #ifdef IMPROVEMENT_X
  140. //Initializes the threads
  141. #pragma omp parallel
  142. {
  143. //Gets the ID from it's own thread
  144. int ID = omp_get_thread_num();
  145.  
  146. //Divides each element od the result matrix for each thread to work with
  147. for (size_t idx = ID ; idx < (result.rows - 1) * result.columns + result.columns ; idx += num_threads) {
  148. //Initializes the variable for the sum of the elements and sets it to 0
  149. double sum = 0.0;
  150. //Goes through the elements of each column of matrix a and multiplies to the coresponding element of a row in b
  151. for (size_t c = 0; c < a.columns; c++) {
  152. //Accumulate the product of the elements
  153. //The row of matrix a is assessed by diviing the index of the result matrix by the number of columns
  154. //The column of the matrix b is calculated by using the remainder of the division of the index by the number of columns in result
  155. sum += a(idx / result.columns, c) * b(c, idx % result.columns);
  156. }
  157. //Store the result
  158. result[idx] = sum;
  159. }
  160. }
  161. #endif
  162. return result;
  163. }
  164. #pragma GCC pop_options
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement