Untitled

// Copyright 2018 Delft University of Technology
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// OpenMP:
#include <omp.h>

#include "../matmult.hpp"

#define IMPROVEMENT_X

/* You may not remove these pragmas: */
/*************************************/
#pragma GCC push_options
#pragma GCC optimize ("O1")
/*************************************/

Matrix<float> multiplyMatricesOMP(Matrix<float> a,
                                  Matrix<float> b,
                                  int num_threads) {
  //Verifies the dimension of the matrixes, it must be possible to multiply the matrixes
  if (a.columns != b.rows){
      throw std::domain_error("Matrix dimensions do not allow matrix-multiplication.");
  }
  //Creates result matrix
  auto result = Matrix<float>(a.rows , b.columns);
  //Gives OpenMP the number of threads to be initialized
  omp_set_num_threads(num_threads);


  //This version is not as eficient as it could be.
  //The fact that by dividing rows of matrix a to each thread,
  //some threads might work less than others, dependig on the size of the matrix and number of threads.
  //Meadning there is time wasted in waiting for threads with more work than others
  #ifdef FIRST_ATEMPT
    //Initializes the threads
    #pragma omp parallel
    {
      //Gets the ID from it's own thread
      int ID = omp_get_thread_num();
      //Divides the rows for each thread to work with
      for (size_t r = ID * a.rows / num_threads ; r < (ID + 1) * a.rows / num_threads ; r++) {
        //For each column
        for (size_t c = 0; columns < b.columns; c++) {
          //Initializes the variable for the sum of the elements and sets it to 0
          float sum = 0.0;
          //Accumulate the product of the elements
          for (size_t i = 0; i < b.rows; i++) {
            sum += a(r, i) * b(i, c);
          }

          //Store the result
          result(r, c) = sum;
        }
      }
    }
  #endif

  //By dividing each calculation of the elements in the matrix result for each thread,
  //the diference of work between each thread becomes smaller.
  #ifdef IMPROVEMENT_N1
    //Initializes the threads
    #pragma omp parallel
    {
      //Gets the ID from it's own thread
      int ID = omp_get_thread_num();

      //Divides each element od the result matrix for each thread to work with
      for (size_t idx = ID ; idx < (result.rows - 1) * result.columns + result.columns ; idx += num_threads) {
        //Initializes the variable for the sum of the elements and sets it to 0
        float sum = 0.0;
        //Goes through the elements of each column of matrix a and multiplies to the coresponding element of a row in b
        for (size_t c = 0; c < a.columns; c++) {
          //Accumulate the product of the elements
            sum += a(idx / result.columns, c) * b(c, idx % result.columns);
        }
        //Store the result
        result[idx] = sum;
      }
    }
  #endif

  }
  return result;
}

Matrix<double> multiplyMatricesOMP(Matrix<double> a,
                                   Matrix<double> b,
                                   int num_threads) {
  //Verifies the dimension of the matrixes, it must be possible to multiply the matrixes
  if (a.columns != b.rows){
      throw std::domain_error("Matrix dimensions do not allow matrix-multiplication.");
  }
  //Creates result matrixhttps://github.com/pa
  auto result = Matrix<double>(a.rows , b.columns);
  //Gives OpenMP the number of threads to be initialized
  omp_set_num_threads(num_threads);

  //This version is not as eficient as it could be.
  //The fact that by dividing rows of matrix a to each thread,
  //some threads might work less than others, dependig on the size of the matrix and number of threads.
  //Meadning there is time wasted in waiting for threads with more work than others
  #ifdef FIRST_ATEMPT
    //Initializes the threads
    #pragma omp parallel
    {
      //Gets the ID from it's own thread
      int ID = omp_get_thread_num();
      //Divides the rows for each thread to work with
      for (size_t r = ID * a.rows / num_threads ; r < (ID + 1) * a.rows / num_threads ; r++) {
        //For each column
        for (size_t c = 0; columns < b.columns; c++) {
          //Initializes the variable for the sum of the elements and sets it to 0
          double sum = 0.0;
          //Accumulate the product of the elements
          for (size_t i = 0; i < b.rows; i++) {
            sum += a(r, i) * b(i, c);
          }

          //Store the result
          result(r, c) = sum;
        }
      }
    }
  #endif

  //By dividing each calculation of the elements in the matrix result for each thread,
  //the diference of work between each thread becomes smaller.
  #ifdef IMPROVEMENT_X
    //Initializes the threads
    #pragma omp parallel
    {
      //Gets the ID from it's own thread
      int ID = omp_get_thread_num();

      //Divides each element od the result matrix for each thread to work with
      for (size_t idx = ID ; idx < (result.rows - 1) * result.columns + result.columns ; idx += num_threads) {
        //Initializes the variable for the sum of the elements and sets it to 0
        double sum = 0.0;
        //Goes through the elements of each column of matrix a and multiplies to the coresponding element of a row in b
        for (size_t c = 0; c < a.columns; c++) {
          //Accumulate the product of the elements
          //The row of matrix a is assessed by diviing the index of the result matrix by the number of columns
          //The column of the matrix b is calculated by using the remainder of the division of the index by the number of columns in result
          sum += a(idx / result.columns, c) * b(c, idx % result.columns);
        }
        //Store the result
        result[idx] = sum;
      }
    }
  #endif
  return result;
}
#pragma GCC pop_options