Untitled

#include <mpi.h>
#include <stdio.h>


extern void compDotProduct(int, int, double*, double*, double*);

void distribute(int my_rank, int p, int *rows, int n, double *A, double *b)

/*
  Distribute an m x n matrix A and and an n vector b on p processes.
  Input
     my_rank: rank of the current process
     p      : number of processes
     rows:    array of size p. p[i] contains the number of rows on process i
     A      : an array of size m*n = (sum_i rows[i])*n
     n      : number of columns in A
     b      : vector of size n

  Process 0 distributes the matrix A by rows, where process i (i=0,...,p) gets
  rows[i] rows in order, and broadcast the b vector.
*/
{
  int tag    = 0;
  int offset, dest;
//------
    int prev, next;
        MPI_Request req[2];
        MPI_Status stat[2];

        prev = my_rank - 1;
        next = my_rank + 1;

        if(my_rank == 0) prev = p - 1;
        if(my_rank == p - 1) next = 0;
//------
  MPI_Status status;

  if (my_rank ==0)
    {
      offset = 0;
      for (dest = 1; dest < p; dest++)
    {
      // offset of the begining of data that is sent to dest
      offset += rows[dest-1];
      MPI_Isend(A + offset*n, rows[dest]*n, MPI_DOUBLE, next, tag, MPI_COMM_WORLD, &req[0]);
    }
    }
  else
    MPI_Irecv(A, rows[my_rank]*n, MPI_DOUBLE, prev, tag, MPI_COMM_WORLD, &req[1]);

  // Broadcast b
  MPI_Bcast(b, n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
//-----
    MPI_Waitall(1, req, stat);
//-----
}


void collect(int my_rank, int p, int *rows, double *y)
{
  /*
     Process i contains rows[i] elements of vector y.
     Collect all the elements of y on process 0. The ordering of the blocks is 0,...,p-1

     Input
     my_rank     rank of current process
     p           number of processes
     rows        an array of size p, where rows[i] is the number of elements of y on process i

     Output
     y           an array of size m = sum_i rows[i]
  */
//------
        int prev, next;
    MPI_Request req[2];
        MPI_Status stat[2];

    prev = my_rank - 1;
    next = my_rank + 1;

    if(my_rank == 0) prev = p - 1;
    if(my_rank == p - 1) next = 0;
//------
  int tag=0, source, offset;
  MPI_Status status;

  if (my_rank !=0)
    MPI_Isend(y, rows[my_rank], MPI_DOUBLE, next, tag, MPI_COMM_WORLD, &req[0]);
  else
    {
      offset = 0;
      for (source = 1; source < p; source++)
    {
      // offset of the beginning of data that is received fro process source
      offset += rows[source-1];
      MPI_Irecv(y+offset, rows[source], MPI_DOUBLE, prev, tag, MPI_COMM_WORLD,  &req[1]);
    }
    }
//-----
    MPI_Waitall(1,req,stat);
//-----
}

void parallelMatrixVectorProduct(int my_rank, int p, int *rows, int n, double *A, double *b, double *y)
{
  // Distribute A and b
  distribute(my_rank, p, rows, n, A, b);

  // Compute product on process my_rank
  compDotProduct(rows[my_rank], n, A, b, y);

  // Collect the vector y
  collect(my_rank, p, rows, y);
}