Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <vector>
- #include <algorithm>
- #include <random>
- #include <chrono>
- #define INNER_MAT1_ROWS 8
- #define INNER_MAT1_COLS 8
- #define INNER_MAT2_COLS 8
- #define OUTER_MAT1_ROWS 128
- #define OUTER_MAT1_COLS 128
- #define OUTER_MAT2_COLS 128
- template <typename T>
- static std::ostream& outputMatrix(std::ostream& os, const std::vector<std::vector<T>>& matrix)
- {
- os << std::endl;
- std::for_each(matrix.cbegin(), matrix.cend(), [&os](const std::vector<T>& row)
- {
- for (const auto& el : row)
- {
- os << el << " ";
- }
- os << std::endl;
- });
- return os;
- }
- template <typename T>
- static void generateRandomRealMatrix(T** matrix, const int m, const int n)
- {
- std::random_device rd;
- std::mt19937 generator(rd());
- std::uniform_real_distribution<T> dist(DBL_MIN, DBL_MAX);
- for (int i = 0; i < m; i++)
- {
- for (int j = 0; j < n; j++)
- {
- matrix[i][j] = dist(generator);
- }
- }
- }
- template <typename T>
- static T** getMatrix(const int rows, const int columns)
- {
- T** matrix = new T*[rows];
- for (int i = 0; i < rows; i++)
- {
- matrix[i] = new T[columns];
- }
- return matrix;
- }
- template <typename T>
- static void unvectMulpiply(T** res, T** mat1, T** mat2, const int rows, const int cols)
- {
- for (int i = 0; i < rows; i++)
- {
- for (int j = 0; j < rows; j++)
- {
- #pragma loop(no_vector)
- for (int k = 0; k < cols; k++)
- {
- res[i][j] += mat1[i][k] * mat2[k][j];
- }
- }
- }
- }
- template <typename T>
- static void vectMultiply(T** __restrict res, T** __restrict mat1, T** __restrict mat2, const int rows, const int cols, const int mat2Cols)
- {
- for (int i = 0; i < rows; i++)
- {
- for (int j = 0; j < cols; j++)
- {
- T el = mat1[i][j];
- #pragma vector aligned
- #pragma vector always
- for (int k = 0; k < mat2Cols; k++)
- {
- res[i][k] += el * mat2[j][k];
- }
- }
- }
- }
- template <typename T>
- static bool areEquals(T** mat1, T** mat2, const int rows, const int cols)
- {
- for (int i = 0; i < rows; i++)
- {
- for (int j = 0; j < cols; j++)
- {
- if (mat1[i][j] != mat2[i][j])
- {
- return false;
- }
- }
- }
- return true;
- }
- template <typename T>
- static void freeMatrix(T** matrix, const int rows)
- {
- for (int i = 0; i < rows; i++)
- {
- delete[] matrix[i];
- }
- delete[] matrix;
- }
- template <typename Callable, typename ... Args>
- auto countTime(Callable function, Args&& ... args)
- {
- auto start = std::chrono::high_resolution_clock::now();
- function(std::forward<Args>(args)...);
- auto finish = std::chrono::high_resolution_clock::now();
- return std::chrono::duration_cast<std::chrono::milliseconds>(finish - start).count();
- }
- template <typename T>
- static bool _areEquals(T* mat1, T* mat2, const int rows, const int cols)
- {
- int index = 0;
- for (int i = 0; i < rows; i++)
- {
- index = cols * i;
- for (int j = 0; j < cols; j++, index++)
- {
- if (mat1[index] != mat2[index])
- {
- return false;
- }
- }
- }
- return true;
- }
- template <typename T>
- static T* _getMatrix(const int rows, const int cols)
- {
- return new T[(static_cast<int64_t>(cols)) * (static_cast<int64_t>(rows))];
- }
- template <typename T>
- static void _testMultiply(T* res, T* mat1, T* mat2, const int mat1Rows, const int mat1Cols, const int mat2Cols)
- {
- int index0 = 0;
- int index1 = 0;
- for (int i = 0; i < mat1Rows; i++)
- {
- for (int j = 0; j < mat1Cols; j++)
- {
- index0 = i * mat2Cols + j;
- index1 = i * mat1Cols;
- for (int k = 0; k < mat1Cols; k++, index1++)
- {
- res[index0] += mat1[index1] * mat2[mat2Cols * k + j];
- }
- }
- }
- }
- #pragma optimize("", off)
- template <typename T>
- static void _unvectMultiply(T** res, T** mat1, T** mat2, const int outerMat1Rows, const int outerMat1Cols, const int outerMat2Cols,
- const int innerMat1Rows, const int innerMat1Cols, const int innerMat2Cols)
- {
- // mat1Cols == mat2Rows
- // res.Rows == mat1Rows
- // res.Cols == mat2Cols
- const int innerMatrixSize = innerMat1Rows * innerMat2Cols;
- T* m = _getMatrix<T>(innerMat1Rows, innerMat2Cols);
- T* r = nullptr;
- T* m1 = nullptr;
- T* m2 = nullptr;
- int index = 0;
- #pragma loop(no_vector)
- for (int oi = 0; oi < outerMat1Rows; oi++)
- {
- #pragma loop(no_vector)
- for (int oj = 0; oj < outerMat2Cols; oj++)
- {
- r = res[outerMat2Cols * oi + oj];
- #pragma loop(no_vector)
- for (int ok = 0; ok < outerMat1Cols; ok++)
- {
- // res[i][j] += mat1[i][k] * mat2[k][j];
- m1 = mat1[outerMat1Cols * oi + ok]; // inner matrix1
- m2 = mat2[outerMat2Cols * ok + oj]; // inner matrix2
- std::memset(m, 0, sizeof(T) * innerMatrixSize);
- #pragma loop(no_vector)
- for (int ii = 0; ii < innerMat1Rows; ii++)
- {
- #pragma loop(no_vector)
- for (int ij = 0; ij < innerMat2Cols; ij++)
- {
- #pragma loop( no_vector )
- for (int ik = 0; ik < innerMat1Cols; ik++)
- {
- m[ii * innerMat2Cols + ij] += m1[innerMat1Cols * ii + ik] * m2[innerMat2Cols * ik + ij];
- }
- }
- }
- #pragma loop(no_vector)
- for (int i = 0; i < innerMat1Rows; i++)
- {
- index = i * innerMat2Cols;
- #pragma loop(no_vector)
- for (int j = 0; j < innerMat2Cols; j++, index++)
- {
- r[index] += m[index];
- }
- }
- }
- }
- }
- }
- #pragma optimize("", on)
- template <typename T>
- static void _vectMultiply(T** __restrict res, T** __restrict mat1, T** __restrict mat2,
- const int outerMat1Rows, const int outerMat1Cols, const int outerMat2Cols,
- const int innerMat1Rows, const int innerMat1Cols, const int innerMat2Cols)
- {
- T* m1 = nullptr;
- T* m2 = nullptr;
- T* r = nullptr;
- T* m = _getMatrix<T>(innerMat1Rows, innerMat2Cols);
- for (int oi = 0; oi < outerMat1Rows; oi++)
- {
- for (int oj = 0; oj < outerMat1Cols; oj++)
- {
- m1 = mat1[outerMat1Cols * oi + oj];
- for (int ok = 0; ok < outerMat2Cols; ok++)
- {
- // res[i][k] += m1 * mat2[j][k]
- r = res[outerMat2Cols * oi + ok];
- m2 = mat2[outerMat2Cols * oj + ok];
- std::memset(m, 0, sizeof(T) * innerMat1Rows * innerMat2Cols);
- for (int ii = 0; ii < innerMat1Rows; ii++)
- {
- for (int ij = 0; ij < innerMat1Cols; ij++)
- {
- const T el = m1[innerMat1Cols * ii + ij];
- for (int ik = 0; ik < innerMat2Cols; ik++)
- {
- m[innerMat2Cols * ii + ik] += el * m2[innerMat2Cols * ij + ik];
- }
- }
- }
- for (int i = 0; i < innerMat1Rows; i++)
- {
- for (int j = 0; j < innerMat2Cols; j++)
- {
- r[innerMat2Cols * i + j] += m[innerMat2Cols * i + j];
- }
- }
- }
- }
- }
- }
- template <typename T>
- static void _intrinsicMultiply(T** __restrict res, T** __restrict mat1, T** __restrict mat2,
- const int outerMat1Rows, const int outerMat1Cols, const int outerMat2Cols,
- const int innerMat1Rows, const int innerMat1Cols, const int innerMat2Cols)
- {
- T* m1 = nullptr;
- T* m2 = nullptr;
- T* r = nullptr;
- T* m = _getMatrix<T>(innerMat1Rows, innerMat2Cols);
- for (int oi = 0; oi < outerMat1Rows; oi++)
- {
- for (int oj = 0; oj < outerMat1Cols; oj++)
- {
- m1 = mat1[outerMat1Cols * oi + oj];
- for (int ok = 0; ok < outerMat2Cols; ok++)
- {
- // res[i][k] += m1 * mat2[j][k]
- r = res[outerMat2Cols * oi + ok];
- m2 = mat2[outerMat2Cols * oj + ok];
- std::memset(m, 0, sizeof(T) * innerMat1Rows * innerMat2Cols);
- for (int ii = 0; ii < innerMat1Rows; ii++)
- {
- for (int ij = 0; ij < innerMat1Cols; ij++)
- {
- const T el = m1[innerMat1Cols * ii + ij];
- for (int ik = 0; ik < innerMat2Cols; ik++)
- {
- m[innerMat2Cols * ii + ik] += el * m2[innerMat2Cols * ij + ik];
- }
- }
- }
- for (int i = 0; i < innerMat1Rows; i++)
- {
- for (int j = 0; j < innerMat2Cols; j++)
- {
- r[innerMat2Cols * i + j] += m[innerMat2Cols * i + j];
- }
- }
- }
- }
- }
- }
- template <typename T>
- static void _generateRandRealMatrix(T* m, const int rows, const int cols)
- {
- std::random_device rd;
- std::mt19937 generator(rd());
- std::uniform_real_distribution<T> dist(DBL_MIN, DBL_MAX);
- for (int i = 0; i < rows; i++)
- {
- int index = cols * i;
- for (int j = 0; j < cols; j++, index++)
- {
- m[index] = dist(generator);
- }
- }
- }
- template <typename T>
- static T** _getMatrixOfMatrixes(const int outerRows, const int outerCols, const int innerRows, const int innerCols, bool genRandomReal = true)
- {
- T** matrix = new T*[(static_cast<int64_t>(outerCols)) * (static_cast<int64_t>(outerRows))];
- const int innerSize = innerRows * innerCols;
- for (int i = 0; i < outerRows; i++)
- {
- int index = outerCols * i;
- for (int j = 0; j < outerCols; j++, index++)
- {
- matrix[index] = new T[innerSize];
- if (genRandomReal)
- {
- _generateRandRealMatrix(matrix[index], innerRows, innerCols);
- }
- }
- }
- return matrix;
- }
- template <typename T>
- static void deleteMatrixOfMatrixes(T** matrix, const int outerRows, const int outerCols, const int innerRows, const int innerCols)
- {
- for (int i = 0; i < outerRows; i++)
- {
- int index = outerCols * i;
- for (int j = 0; j < outerCols; j++, index++)
- {
- delete[] matrix[index];
- }
- }
- delete[] matrix;
- }
- template <typename T>
- static bool areEqualMatrixesOfMatrixes(T** mat1, T** mat2, const int outerRows, const int outerCols, const int innerRows, const int innerCols)
- {
- T* m1 = nullptr;
- T* m2 = nullptr;
- for (int oi = 0; oi < outerRows; oi++)
- {
- for (int oj = 0; oj < outerCols; oj++)
- {
- m1 = mat1[outerCols * oi + oj];
- m2 = mat2[outerCols * oi + oj];
- for (int ii = 0; ii < innerRows; ii++)
- {
- for (int ij = 0; ij < innerCols; ij++)
- {
- if (m1[innerCols * ii + ij] != m2[innerCols * ii + ij])
- {
- return false;
- }
- }
- }
- }
- }
- return true;
- }
- int main()
- {
- double** mat1 = _getMatrixOfMatrixes<double>(OUTER_MAT1_ROWS, OUTER_MAT1_COLS, INNER_MAT1_ROWS, INNER_MAT1_COLS);
- double** mat2 = _getMatrixOfMatrixes<double>(OUTER_MAT1_COLS, OUTER_MAT2_COLS, INNER_MAT1_COLS, INNER_MAT2_COLS);
- double** res = _getMatrixOfMatrixes<double>(OUTER_MAT1_ROWS, OUTER_MAT2_COLS, INNER_MAT1_ROWS, INNER_MAT2_COLS, false);
- double** res1 = _getMatrixOfMatrixes<double>(OUTER_MAT1_ROWS, OUTER_MAT2_COLS, INNER_MAT1_ROWS, INNER_MAT2_COLS, false);
- //double** res2 = _getMatrixOfMatrixes<double>(OUTER_MAT1_ROWS, OUTER_MAT2_COLS, INNER_MAT1_ROWS, INNER_MAT2_COLS, false);
- std::cout <<
- countTime<decltype(_vectMultiply<double>)>(_vectMultiply, res1, mat1, mat2, OUTER_MAT1_ROWS, OUTER_MAT1_COLS, OUTER_MAT2_COLS, INNER_MAT1_ROWS, INNER_MAT1_COLS, INNER_MAT2_COLS) << std::endl;
- std::cout << countTime<decltype(_unvectMultiply<double>)>(_unvectMultiply, res, mat1, mat2, OUTER_MAT1_ROWS, OUTER_MAT1_COLS, OUTER_MAT2_COLS, INNER_MAT1_ROWS, INNER_MAT1_COLS, INNER_MAT2_COLS) << std::endl;
- //_vectMultiply(res1, mat1, mat2, OUTER_MAT1_ROWS, OUTER_MAT1_COLS, OUTER_MAT2_COLS, INNER_MAT1_ROWS, INNER_MAT1_COLS, INNER_MAT2_COLS);
- //_unvectMultiply(res, mat1, mat2, OUTER_MAT1_ROWS, OUTER_MAT1_COLS, OUTER_MAT2_COLS, INNER_MAT1_ROWS, INNER_MAT1_COLS, INNER_MAT2_COLS);
- //_intrinsicMultiply(res2, mat1, mat2, OUTER_MAT1_ROWS, OUTER_MAT1_COLS, OUTER_MAT2_COLS, INNER_MAT1_ROWS, INNER_MAT1_COLS, INNER_MAT2_COLS);
- std::cout << areEqualMatrixesOfMatrixes(res, res1, OUTER_MAT1_ROWS, OUTER_MAT1_COLS, INNER_MAT1_ROWS, INNER_MAT2_COLS) << std::endl;
- //std::cout << areEqualMatrixesOfMatrixes(res1, res2, OUTER_MAT1_ROWS, OUTER_MAT1_COLS, INNER_MAT1_ROWS, INNER_MAT2_COLS) << std::endl;
- deleteMatrixOfMatrixes(mat1, OUTER_MAT1_ROWS, OUTER_MAT1_COLS, INNER_MAT1_ROWS, INNER_MAT1_COLS);
- deleteMatrixOfMatrixes(mat2, OUTER_MAT1_COLS, OUTER_MAT2_COLS, INNER_MAT1_COLS, INNER_MAT2_COLS);
- deleteMatrixOfMatrixes(res, OUTER_MAT1_ROWS, OUTER_MAT2_COLS, INNER_MAT1_ROWS, INNER_MAT2_COLS);
- deleteMatrixOfMatrixes(res1, OUTER_MAT1_ROWS, OUTER_MAT2_COLS, INNER_MAT1_ROWS, INNER_MAT2_COLS);
- //deleteMatrixOfMatrixes(res2, OUTER_MAT1_ROWS, OUTER_MAT2_COLS, INNER_MAT1_ROWS, INNER_MAT2_COLS);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement