Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Copyright 2018 Delft University of Technology
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- // OpenMP:
- #include <omp.h>
- #include "../matmult.hpp"
- #define IMPROVEMENT_X
- /* You may not remove these pragmas: */
- /*************************************/
- #pragma GCC push_options
- #pragma GCC optimize ("O1")
- /*************************************/
- Matrix<float> multiplyMatricesOMP(Matrix<float> a,
- Matrix<float> b,
- int num_threads) {
- //Verifies the dimension of the matrixes, it must be possible to multiply the matrixes
- if (a.columns != b.rows){
- throw std::domain_error("Matrix dimensions do not allow matrix-multiplication.");
- }
- //Creates result matrix
- auto result = Matrix<float>(a.rows , b.columns);
- //Gives OpenMP the number of threads to be initialized
- omp_set_num_threads(num_threads);
- //This version is not as eficient as it could be.
- //The fact that by dividing rows of matrix a to each thread,
- //some threads might work less than others, dependig on the size of the matrix and number of threads.
- //Meadning there is time wasted in waiting for threads with more work than others
- #ifdef FIRST_ATEMPT
- //Initializes the threads
- #pragma omp parallel
- {
- //Gets the ID from it's own thread
- int ID = omp_get_thread_num();
- //Divides the rows for each thread to work with
- for (size_t r = ID * a.rows / num_threads ; r < (ID + 1) * a.rows / num_threads ; r++) {
- //For each column
- for (size_t c = 0; columns < b.columns; c++) {
- //Initializes the variable for the sum of the elements and sets it to 0
- float sum = 0.0;
- //Accumulate the product of the elements
- for (size_t i = 0; i < b.rows; i++) {
- sum += a(r, i) * b(i, c);
- }
- //Store the result
- result(r, c) = sum;
- }
- }
- }
- #endif
- //By dividing each calculation of the elements in the matrix result for each thread,
- //the diference of work between each thread becomes smaller.
- #ifdef IMPROVEMENT_N1
- //Initializes the threads
- #pragma omp parallel
- {
- //Gets the ID from it's own thread
- int ID = omp_get_thread_num();
- //Divides each element od the result matrix for each thread to work with
- for (size_t idx = ID ; idx < (result.rows - 1) * result.columns + result.columns ; idx += num_threads) {
- //Initializes the variable for the sum of the elements and sets it to 0
- float sum = 0.0;
- //Goes through the elements of each column of matrix a and multiplies to the coresponding element of a row in b
- for (size_t c = 0; c < a.columns; c++) {
- //Accumulate the product of the elements
- sum += a(idx / result.columns, c) * b(c, idx % result.columns);
- }
- //Store the result
- result[idx] = sum;
- }
- }
- #endif
- }
- return result;
- }
- Matrix<double> multiplyMatricesOMP(Matrix<double> a,
- Matrix<double> b,
- int num_threads) {
- //Verifies the dimension of the matrixes, it must be possible to multiply the matrixes
- if (a.columns != b.rows){
- throw std::domain_error("Matrix dimensions do not allow matrix-multiplication.");
- }
- //Creates result matrixhttps://github.com/pa
- auto result = Matrix<double>(a.rows , b.columns);
- //Gives OpenMP the number of threads to be initialized
- omp_set_num_threads(num_threads);
- //This version is not as eficient as it could be.
- //The fact that by dividing rows of matrix a to each thread,
- //some threads might work less than others, dependig on the size of the matrix and number of threads.
- //Meadning there is time wasted in waiting for threads with more work than others
- #ifdef FIRST_ATEMPT
- //Initializes the threads
- #pragma omp parallel
- {
- //Gets the ID from it's own thread
- int ID = omp_get_thread_num();
- //Divides the rows for each thread to work with
- for (size_t r = ID * a.rows / num_threads ; r < (ID + 1) * a.rows / num_threads ; r++) {
- //For each column
- for (size_t c = 0; columns < b.columns; c++) {
- //Initializes the variable for the sum of the elements and sets it to 0
- double sum = 0.0;
- //Accumulate the product of the elements
- for (size_t i = 0; i < b.rows; i++) {
- sum += a(r, i) * b(i, c);
- }
- //Store the result
- result(r, c) = sum;
- }
- }
- }
- #endif
- //By dividing each calculation of the elements in the matrix result for each thread,
- //the diference of work between each thread becomes smaller.
- #ifdef IMPROVEMENT_X
- //Initializes the threads
- #pragma omp parallel
- {
- //Gets the ID from it's own thread
- int ID = omp_get_thread_num();
- //Divides each element od the result matrix for each thread to work with
- for (size_t idx = ID ; idx < (result.rows - 1) * result.columns + result.columns ; idx += num_threads) {
- //Initializes the variable for the sum of the elements and sets it to 0
- double sum = 0.0;
- //Goes through the elements of each column of matrix a and multiplies to the coresponding element of a row in b
- for (size_t c = 0; c < a.columns; c++) {
- //Accumulate the product of the elements
- //The row of matrix a is assessed by diviing the index of the result matrix by the number of columns
- //The column of the matrix b is calculated by using the remainder of the division of the index by the number of columns in result
- sum += a(idx / result.columns, c) * b(c, idx % result.columns);
- }
- //Store the result
- result[idx] = sum;
- }
- }
- #endif
- return result;
- }
- #pragma GCC pop_options
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement