someMPI

#include <iostream>
#include <random>
#include <mpi.h>
#include <memory.h>
#include <unistd.h>
#include <stdlib.h>
#include <chrono>
#include <fstream>

#define PATH_TO_MATRICES "/home/mama/someMPI/ass/matrices.txt"
#define PATH_TO_RESULT "/home/mama/someMPI/ass/res.txt"

double *multiplyMatrix(double *matrix1, double *matrix2, size_t n1, size_t m1, size_t n2, size_t m2)
{
    if (m1 != n2)
    {
        std::cout << "ERROR: matrices are incompatible" << std::endl;
        exit(-1);
    }
    double *resMatrix = (double *)malloc(n1 * m2 * sizeof(double));
    for (size_t i = 0; i < n1; i++)
    {
        for (size_t j = 0; j < m2; j++)
        {
            double element = 0;
            for (size_t u = 0; u < m1; u++)
            {
                element += matrix1[i * m1 + u] * matrix2[u * m2 + j];
            }
            resMatrix[i * m2 + j] = element;
        }
    }
    return resMatrix;
}

void matrixPrinter(double *matrix, size_t n, size_t m)
{
    for (size_t i = 0; i < n; i++)
    {
        for (size_t j = 0; j < m; j++)
        {
            std::cout << matrix[i * m + j] << " ";
        }
        std::cout << std::endl;
    }
}

void matrixRandomiser(double *matrix, size_t n, size_t m)
{
    for (size_t i = 0; i < n * m; i++)
    {
        matrix[i] = rand() % 100 + 1;
    }
}

void writeMatrixToFile(double *matrix, size_t n, size_t m, std::ofstream &file)
{
    file << n << " " << m << std::endl;
    for (size_t i = 0; i < n; i++)
    {
        for (size_t j = 0; j < m; j++)
        {
            file << matrix[i * m + j] << " ";
        }
        file << std::endl;
    }
}

void createMatrixesAndWriteToFile(std::string path)
{
    std::ofstream fout;
    fout.open(path);
    std::cout << "Enter matrices dimensions" << std::endl;
    size_t n1, m1, n2, m2;
    std::cin >> n1 >> m1 >> n2 >> m2;
    double *matrix1 = (double *)malloc(sizeof(double) * n1 * m1);
    double *matrix2 = (double *)malloc(sizeof(double) * n2 * m2);
    matrixRandomiser(matrix1, n1, m1);
    matrixRandomiser(matrix2, n2, m2);
    writeMatrixToFile(matrix1, n1, m1, fout);
    writeMatrixToFile(matrix2, n2, m2, fout);
    fout.close();
}

void testMPI(int &argc, char *argv[])
{

    int rank, number;
    MPI_Init(&argc, &argv);

    MPI_Comm_size(MPI_COMM_WORLD, &number);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    std::cout << "Rank: " << rank << "\nNumber of processes: " << number << std::endl;

    MPI_Finalize();
}

double *readMatrixFromFile(std::ifstream &fin, size_t &n, size_t &m)
{
    fin >> n >> m;
    double *matrix = (double *)malloc(sizeof(double) * n * m);
    for (size_t i = 0; i < n * m; i++)
    {
        fin >> matrix[i];
    }
    return matrix;
}
void testMul()
{
    size_t n1, m1, n2, m2;
    double *matrix1, *matrix2;
    std::ifstream fin;
    fin.open(PATH_TO_MATRICES);
    matrix1 = readMatrixFromFile(fin, n1, m1);
    matrix2 = readMatrixFromFile(fin, n2, m2);
    fin.close();
    auto start = std::chrono::high_resolution_clock::now();
    double *res = multiplyMatrix(matrix1, matrix2, n1, m1, n2, m2);
    auto stop = std::chrono::high_resolution_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::seconds>(stop - start);
    std::cout << "Elapsed time (one process) " << duration.count() << " seconds" << std::endl;
}

void processMatrixRoot(int numberOfProcesses)
{
    createMatrixesAndWriteToFile(PATH_TO_MATRICES);
    size_t n1, m1, n2, m2;

    std::ifstream fin;
    fin.open(PATH_TO_MATRICES);

    double *matrix1, *matrix2;

    matrix1 = readMatrixFromFile(fin, n1, m1);
    matrix2 = readMatrixFromFile(fin, n2, m2);

    fin.close();

    if (m1 != n2)
    {
        std::cout << "ERROR: matrices are incompatible" << std::endl;
        exit(-1);
    }
    std::cout << "OK" << std::endl;
    // matrixPrinter(matrix1, n1, m1);
    // matrixPrinter(matrix2, n2, m2);
    // sleep(2);
    size_t numberOfRowsForEachProcessFirstMatrix = n1 / (numberOfProcesses - 1);
    size_t numberOfRowsForEachProcessSecondMatrix = n2 / (numberOfProcesses - 1);
    auto start = std::chrono::high_resolution_clock::now();
    for (int i = 1; i < numberOfProcesses; i++)
    {
        // std::cout << "Sending dimensions to " << i << " process" << std::endl;
        MPI_Send(&n1, 1, MPI_UNSIGNED_LONG, i, 0, MPI_COMM_WORLD);
        MPI_Send(&m1, 1, MPI_UNSIGNED_LONG, i, 0, MPI_COMM_WORLD);
        MPI_Send(&n2, 1, MPI_UNSIGNED_LONG, i, 0, MPI_COMM_WORLD);
        MPI_Send(&m2, 1, MPI_UNSIGNED_LONG, i, 0, MPI_COMM_WORLD);
        // std::cout << "Dimensions sent" << std::endl;

        if (i == numberOfProcesses - 1 && n1 % (numberOfProcesses - 1) != 0)
        {
            // std::cout << "Sending ..." << std::endl;
            // matrixPrinter(matrix1 + (i - 1) * numberOfRowsForEachProcessFirstMatrix * m1, numberOfRowsForEachProcessFirstMatrix + 1, m1);
            // sleep(2);
            MPI_Send(matrix1 + (i - 1) * numberOfRowsForEachProcessFirstMatrix * m1, m1 * (numberOfRowsForEachProcessFirstMatrix + 1), MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
            MPI_Send(matrix2 + (i - 1) * numberOfRowsForEachProcessSecondMatrix * m2, m2 * numberOfRowsForEachProcessSecondMatrix, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
            continue;
        }
        if (i == numberOfProcesses - 1 && n2 % (numberOfProcesses - 1) != 0)
        {
            // std::cout << "Sending 2 ..." << std::endl;
            // matrixPrinter(matrix2 + (i - 1) * numberOfRowsForEachProcessSecondMatrix * m2, numberOfRowsForEachProcessSecondMatrix + 1, m2);
            // sleep(2);
            MPI_Send(matrix1 + (i - 1) * numberOfRowsForEachProcessFirstMatrix * m1, m1 * numberOfRowsForEachProcessFirstMatrix, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
            MPI_Send(matrix2 + (i - 1) * numberOfRowsForEachProcessSecondMatrix * m2, m2 * (numberOfRowsForEachProcessSecondMatrix + 1), MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
            continue;
        }

        // std::cout << "Sending ..." << std::endl;
        // matrixPrinter(matrix1 + (i - 1) * numberOfRowsForEachProcessFirstMatrix * m1, numberOfRowsForEachProcessFirstMatrix, m1);
        // sleep(2);

        // matrixPrinter(matrix2 + (i - 1) * numberOfRowsForEachProcessSecondMatrix * m2, numberOfRowsForEachProcessSecondMatrix, m2);
        // sleep(2);

        MPI_Send(matrix1 + (i - 1) * numberOfRowsForEachProcessFirstMatrix * m1, m1 * numberOfRowsForEachProcessFirstMatrix, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
        MPI_Send(matrix2 + (i - 1) * numberOfRowsForEachProcessSecondMatrix * m2, m2 * numberOfRowsForEachProcessSecondMatrix, MPI_DOUBLE, i, 0, MPI_COMM_WORLD);
    }
    double *res = (double *)malloc(sizeof(double) * n1 * m2);
    memset(res, 0, n1 * m2);
    double *fakeRes = (double *)malloc(sizeof(double) * n1 * m2);
    memset(fakeRes, 0, n1 * m2);
    MPI_Reduce(fakeRes, res, n1 * m2, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    auto stop = std::chrono::high_resolution_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::seconds>(stop - start);
    std::cout << "Elapsed time sync: " << duration.count() << " seconds " << std::endl;
    std::ofstream fout;
    fout.open(PATH_TO_RESULT);
    writeMatrixToFile(res, n1, m2, fout);
    fout.close();
}
void processMatrixRootAsync(int numberOfProcesses)
{
    size_t n1, m1, n2, m2;

    std::ifstream fin;
    fin.open(PATH_TO_MATRICES);

    double *matrix1, *matrix2;

    matrix1 = readMatrixFromFile(fin, n1, m1);
    matrix2 = readMatrixFromFile(fin, n2, m2);

    fin.close();

    if (m1 != n2)
    {
        std::cout << "ERROR: matrices are incompatible" << std::endl;
        exit(-1);
    }
    std::cout << "OK" << std::endl;
    // matrixPrinter(matrix1, n1, m1);
    // matrixPrinter(matrix2, n2, m2);
    // sleep(2);
    size_t numberOfRowsForEachProcessFirstMatrix = n1 / (numberOfProcesses - 1);
    size_t numberOfRowsForEachProcessSecondMatrix = n2 / (numberOfProcesses - 1);
    MPI_Request request;
    auto start = std::chrono::high_resolution_clock::now();
    for (int i = 1; i < numberOfProcesses; i++)
    {
        // std::cout << "Sending dimensions to " << i << " process" << std::endl;
        MPI_Isend(&n1, 1, MPI_UNSIGNED_LONG, i, 0, MPI_COMM_WORLD, &request);
        MPI_Isend(&m1, 1, MPI_UNSIGNED_LONG, i, 0, MPI_COMM_WORLD, &request);
        MPI_Isend(&n2, 1, MPI_UNSIGNED_LONG, i, 0, MPI_COMM_WORLD, &request);
        MPI_Isend(&m2, 1, MPI_UNSIGNED_LONG, i, 0, MPI_COMM_WORLD, &request);
        // std::cout << "Dimensions sent" << std::endl;

        if (i == numberOfProcesses - 1 && n1 % (numberOfProcesses - 1) != 0)
        {
            // std::cout << "Sending ..." << std::endl;
            // matrixPrinter(matrix1 + (i - 1) * numberOfRowsForEachProcessFirstMatrix * m1, numberOfRowsForEachProcessFirstMatrix + 1, m1);
            // sleep(2);
            MPI_Isend(matrix1 + (i - 1) * numberOfRowsForEachProcessFirstMatrix * m1, m1 * (numberOfRowsForEachProcessFirstMatrix + 1), MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &request);
            MPI_Isend(matrix2 + (i - 1) * numberOfRowsForEachProcessSecondMatrix * m2, m2 * numberOfRowsForEachProcessSecondMatrix, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &request);
            continue;
        }
        if (i == numberOfProcesses - 1 && n2 % (numberOfProcesses - 1) != 0)
        {
            // std::cout << "Sending 2 ..." << std::endl;
            // matrixPrinter(matrix2 + (i - 1) * numberOfRowsForEachProcessSecondMatrix * m2, numberOfRowsForEachProcessSecondMatrix + 1, m2);
            // sleep(2);
            MPI_Isend(matrix1 + (i - 1) * numberOfRowsForEachProcessFirstMatrix * m1, m1 * numberOfRowsForEachProcessFirstMatrix, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &request);
            MPI_Isend(matrix2 + (i - 1) * numberOfRowsForEachProcessSecondMatrix * m2, m2 * (numberOfRowsForEachProcessSecondMatrix + 1), MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &request);
            continue;
        }

        // std::cout << "Sending ..." << std::endl;
        // matrixPrinter(matrix1 + (i - 1) * numberOfRowsForEachProcessFirstMatrix * m1, numberOfRowsForEachProcessFirstMatrix, m1);
        // sleep(2);

        // matrixPrinter(matrix2 + (i - 1) * numberOfRowsForEachProcessSecondMatrix * m2, numberOfRowsForEachProcessSecondMatrix, m2);
        // sleep(2);

        MPI_Isend(matrix1 + (i - 1) * numberOfRowsForEachProcessFirstMatrix * m1, m1 * numberOfRowsForEachProcessFirstMatrix, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &request);
        MPI_Isend(matrix2 + (i - 1) * numberOfRowsForEachProcessSecondMatrix * m2, m2 * numberOfRowsForEachProcessSecondMatrix, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, &request);
    }
    double *res = (double *)malloc(sizeof(double) * n1 * m2);
    memset(res, 0, n1 * m2);
    double *fakeRes = (double *)malloc(sizeof(double) * n1 * m2);
    memset(fakeRes, 0, n1 * m2);
    MPI_Reduce(fakeRes, res, n1 * m2, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    auto stop = std::chrono::high_resolution_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::seconds>(stop - start);
    std::cout << "Elapsed time async: " << duration.count() << " seconds " << std::endl;
    std::ofstream fout;
    fout.open(PATH_TO_RESULT);
    writeMatrixToFile(res, n1, m2, fout);
    fout.close();
}

void processMatrixSlave(int numberOfProcesses, int rank)
{
    rank--;
    // std::cout << "This is slave process\n Waiting for info...\n";

    size_t n1, m1, n2, m2;
    MPI_Status status;
    MPI_Recv(&n1, 1, MPI_UNSIGNED_LONG, 0, 0, MPI_COMM_WORLD, &status);
    MPI_Recv(&m1, 1, MPI_UNSIGNED_LONG, 0, 0, MPI_COMM_WORLD, &status);
    MPI_Recv(&n2, 1, MPI_UNSIGNED_LONG, 0, 0, MPI_COMM_WORLD, &status);
    MPI_Recv(&m2, 1, MPI_UNSIGNED_LONG, 0, 0, MPI_COMM_WORLD, &status);

    // std::cout << "Dimensions received!\n";
    int numberOfLinesFirstMatrix = n1 / (numberOfProcesses - 1);
    int numberOfLinesSecondMatrix = n2 / (numberOfProcesses - 1);

    int firstLineFirstMatrix = rank * numberOfLinesFirstMatrix;
    int firstLineSecondMatrix = rank * numberOfLinesSecondMatrix;

    // std::cout << "First line for rank " << rank << " is" << firstLineFirstMatrix << std::endl;
    // std::cout << "First line for sm rank " << rank << " is" << firstLineSecondMatrix << std::endl;

    if (rank == numberOfProcesses - 2 && n1 % (numberOfProcesses - 1) != 0)
    {
        numberOfLinesFirstMatrix++;
    }
    if (rank == numberOfProcesses - 2 && n2 % (numberOfProcesses - 1) != 0)
    {
        //std::cout << "Boooo " << numberOfLinesSecondMatrix << std::endl;
        numberOfLinesSecondMatrix++;
    }

    double *res = (double *)malloc(sizeof(double) * n1 * m2);
    memset(res, 0, sizeof(double) * n1 * m2);

    double *matrix1 = (double *)malloc(sizeof(double) * m1 * numberOfLinesFirstMatrix);
    double *matrix2 = (double *)malloc(sizeof(double) * m2 * numberOfLinesSecondMatrix);

    MPI_Recv(matrix1, m1 * numberOfLinesFirstMatrix, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
    MPI_Recv(matrix2, m2 * numberOfLinesSecondMatrix, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);

    // std::cout << "Everything is received!" << std::endl;
    // if (rank == 2)
    // {
    //     sleep(1);
    //     std::cout << "Received matrix 1:" << std::endl;
    //     matrixPrinter(matrix1, numberOfLinesFirstMatrix, m1);
    //     std::cout << "Received matrix 2:" << std::endl;
    //     matrixPrinter(matrix2, numberOfLinesSecondMatrix, m2);
    // }
    for (int u = 0; u < numberOfProcesses - 1; u++)
    {
        for (size_t k = 0; k < m2; k++)
        {
            for (int i = firstLineFirstMatrix; i < firstLineFirstMatrix + numberOfLinesFirstMatrix; i++)
            {
                for (int j = firstLineSecondMatrix; j < firstLineSecondMatrix + numberOfLinesSecondMatrix; j++)
                {
                    // if (rank == 2)
                    // {
                    //     sleep(2);
                    //     std::cout << i << " " << j << " - " << matrix1[(i - firstLineFirstMatrix) * m1 + j] << " " << matrix2[(j - firstLineSecondMatrix) * m2 + k] << std::endl;
                    //     std::cout << "Added to " << i << " " << k << std::endl;
                    // }
                    res[i * m2 + k] += matrix1[(i - firstLineFirstMatrix) * m1 + j] * matrix2[(j - firstLineSecondMatrix) * m2 + k];
                    // if (rank == 2)
                    // {
                    //     matrixPrinter(res, n1, m2);
                    // }
                }
            }
        }
        int dst = rank + 2;
        if (dst >= numberOfProcesses)
            dst = 1;
        int src = rank;
        if (rank == 0)
            src = numberOfProcesses - 1;
        int numberOfLinesTMP = numberOfLinesSecondMatrix;
        MPI_Sendrecv_replace(&numberOfLinesSecondMatrix, 1, MPI_INT, dst, 0, src, 0, MPI_COMM_WORLD, &status);
        MPI_Sendrecv_replace(&firstLineSecondMatrix, 1, MPI_INT, dst, 0, src, 0, MPI_COMM_WORLD, &status);

        double *matrix2N = (double *)malloc(sizeof(double) * numberOfLinesSecondMatrix * m2);
        MPI_Sendrecv(matrix2, numberOfLinesTMP * m2, MPI_DOUBLE, dst, 0, matrix2N, numberOfLinesSecondMatrix * m2, MPI_DOUBLE, src, 0, MPI_COMM_WORLD, &status);
        free(matrix2);
        matrix2 = matrix2N;
        // if (rank == 2)
        // {
        //     std::cout << "FLSM " << firstLineSecondMatrix << std::endl;
        //     std::cout << "Received matrix" << std::endl;
        //     matrixPrinter(matrix2N, numberOfLinesSecondMatrix, m2);
        //     sleep(2);
        // }
    }
    // sleep(rank);
    // std::cout << n1 << " " << m2 << std::endl;

    MPI_Reduce(res, NULL, n1 * m2, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    // matrixPrinter(res, n1, m2);
}

void testP2P2SynchronousMatrixMul(int &argc, char *argv[])
{

    int rank, numberOfProcesses;
    MPI_Init(&argc, &argv);

    MPI_Comm_size(MPI_COMM_WORLD, &numberOfProcesses);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    // std::cout << "Rank: " << rank << "\nNumber of processes: " << numberOfProcesses << std::endl;

    if (rank == 0)
    {
        processMatrixRoot(numberOfProcesses);
        testMul();
    }
    else
    {
        processMatrixSlave(numberOfProcesses, rank);
    }
    MPI_Barrier(MPI_COMM_WORLD);
}

void testP2P2AsynchronousMatrixMul(int &argc, char *argv[])
{

    int rank, numberOfProcesses;

    MPI_Comm_size(MPI_COMM_WORLD, &numberOfProcesses);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    // std::cout << "Rank: " << rank << "\nNumber of processes: " << numberOfProcesses << std::endl;

    if (rank == 0)
    {
        processMatrixRootAsync(numberOfProcesses);
        // testMul();
    }
    else
    {
        processMatrixSlave(numberOfProcesses, rank);
    }

    MPI_Finalize();
}

void test7Lab(int argc, char *argv[])
{
    testP2P2SynchronousMatrixMul(argc, argv);
    testP2P2AsynchronousMatrixMul(argc, argv);
}

int main(int argc, char *argv[])
{
    test7Lab(argc, argv);
}