#include <stdio.h>
#include <malloc.h>
#include <math.h>
#include <time.h>
#include "mpi.h"
#define DEBUG 1
typedef unsigned char uint8_t;
typedef signed int int32_t;
typedef unsigned int uint32_t;
typedef signed long long int int64_t;
typedef unsigned long long int uint64_t;
#define NCASES 5
uint32_t testCases[NCASES] =
{
100,
200,
400,
800,
1600
};
#define RANK_MASTER 0
int32_t maxProcessors, rank;
void quickSort(int32_t arr[], int32_t left, int32_t right)
{
int32_t i = left, j = right, tmp, pivot = arr[left];
while (i <= j)
{
while (arr[i] < pivot) i++;
while (arr[j] > pivot) j--;
if (i <= j)
{
tmp = arr[i];
arr[i] = arr[j];
arr[j] = tmp;
i++;
j--;
}
}
if(left < j)
quickSort(arr, left, j);
if(i < right)
quickSort(arr, i, right);
}
int32_t **splitArray(int32_t *array, int32_t size, int32_t *piecesToCut, int32_t *mod)
{
int32_t pieces = *piecesToCut;
int32_t piecesPerChunk = size / pieces;
int32_t i, **result, j, k = 0, numAllocations = pieces + 1;
*mod = size - piecesPerChunk * pieces;
if(*mod == 0)
numAllocations--;
*piecesToCut = numAllocations;
result = (int32_t**) malloc( sizeof(int32_t*) * numAllocations);
for(i = 0; i < numAllocations; i++)
{
if(i == numAllocations-1 && (*mod != 0))
{
result[i] = (int32_t*) malloc( sizeof(int32_t) * (*mod));
for(j = 0; j < *mod; j++)
result[i][j] = array[k++];
}
else
{
result[i] = (int32_t*) malloc( sizeof(int32_t) * piecesPerChunk);
for(j = 0; j < piecesPerChunk; j++)
result[i][j] = array[k++];
}
}
return result;
}
int32_t *joinArrays(int32_t **arrays, int32_t size, int32_t piecesPerChunk, int32_t mod, int32_t modIndex)
{
int32_t sizeTemp = size;
if(mod != 0)
sizeTemp--;
int32_t *result = (int32_t*) malloc( sizeof(int32_t) * (piecesPerChunk*sizeTemp + mod)), i, j, k = 0;
for(i = 0; i < size; i++)
{
int32_t tempSize = piecesPerChunk;
if(i == modIndex)
tempSize = mod;
for(j = 0; j < tempSize; j++)
result[k++] = arrays[i][j];
}
return result;
}
void swapArrays(int32_t **v, int32_t i)
{
int32_t *aux;
aux = v[i];
v[i] = v[i+1];
v[i+1] = aux;
}
void sortArrays(int32_t **v, int32_t size, int32_t piecesPerChunck, int32_t mod, int32_t *mIndex)
{
int32_t i;
uint8_t swept;
int32_t sizeOriginal = size;
int32_t modIndex = sizeOriginal-1;
do
{
size--;
swept = 0;
for(i = 0; i < size; i++)
{
int32_t sizeTemp = piecesPerChunck;
if(i+1 == modIndex && mod != 0)
sizeTemp = mod;
if(v[i][0] > v[i+1][sizeTemp-1])
{
if(sizeTemp == mod)
modIndex = i;
swapArrays(v, i);
swept = 1;
}
}
} while(swept);
if(mod == 0)
modIndex = -1;
*mIndex = modIndex;
}
void releaseArrays(int32_t **arrays, int32_t size)
{
int32_t i;
for(i = 0; i < size; i++)
free(arrays[i]);
}
int main(int argc, char* argv[])
{
int32_t rc = MPI_Init(&argc, &argv);
if (rc == MPI_SUCCESS)
{
MPI_Comm_size(MPI_COMM_WORLD, &maxProcessors);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if(rank == RANK_MASTER)
{
uint8_t i;
int32_t *v, j;
for(i = 0; i < NCASES; i++)
{
uint32_t numElements = testCases[i] * 1000, sizeTemp;
int32_t mod, numArrays = maxProcessors-1, intPieces = maxProcessors-1;
v = (int32_t*)malloc( sizeof(int32_t) * numElements );
for(j = 0; j < numElements; j++)
v[j] = numElements - j;
int32_t **arrays = splitArray(v, numElements, &numArrays, &mod);
sizeTemp = (numElements / intPieces);
int32_t nodeCalcMod = (mod == 0 ? -1 : maxProcessors-1);
MPI_Bcast(&nodeCalcMod, 1, MPI_INT, RANK_MASTER, MPI_COMM_WORLD);
for(j = 1; j <= numArrays; j++)
{
int32_t destination = j, tagModifier = 0;
if(j == numArrays && mod != 0)
{
sizeTemp = mod;
destination = j-1;
tagModifier = 10;
}
if(DEBUG >= 3)
printf("Sent to rank %d, array of size %d to process\n", destination, sizeTemp);
MPI_Send(&tagModifier, 1, MPI_INT, destination, 1, MPI_COMM_WORLD);
MPI_Send(&sizeTemp, 1, MPI_UNSIGNED, destination, 10+tagModifier, MPI_COMM_WORLD);
MPI_Send(arrays[j-1], sizeTemp, MPI_INT, destination, 20+tagModifier, MPI_COMM_WORLD);
}
uint64_t processTime = 0, tempTime;
for(j = 1; j <= numArrays; j++)
{
int32_t source = j, tagModifier = 0;
MPI_Status status;
if(j == numArrays && mod != 0)
{
source = j-1;
tagModifier = 10;
}
MPI_Recv(&sizeTemp, 1, MPI_UNSIGNED, source, 1+tagModifier, MPI_COMM_WORLD, &status);
MPI_Recv(arrays[j-1], sizeTemp, MPI_INT, source, 2+tagModifier, MPI_COMM_WORLD, &status);
MPI_Recv(&tempTime, 1, MPI_UNSIGNED_LONG_LONG, source, 3+tagModifier, MPI_COMM_WORLD, &status);
if(tempTime > processTime)
processTime = tempTime;
if(DEBUG >= 3)
printf("Received sorted array from rank %d with %d elements\n", source, sizeTemp);
}
if(DEBUG >= 2)
{
printf("\n\n");
sizeTemp = (numElements / intPieces);
for(j = 0; j < numArrays; j++)
{
int32_t end = sizeTemp-1;
if(j == numArrays-1 && mod != 0)
end = mod-1;
printf("Array of index %d starts in %d and ends in %d, end is %d, mod %d, numarrays %d\n", j, arrays[j][0], arrays[j][end], end, mod, numArrays);
}
}
int32_t modIndex;
sortArrays(arrays, numArrays, (numElements / intPieces), mod, &modIndex);
if(DEBUG >= 2)
for(j = 0; j < numArrays; j++)
{
int32_t end = sizeTemp-1;
if(j == modIndex)
end = mod-1;
printf("2 Array of index %d starts in %d and ends in %d, end is %d\n", j, arrays[j][0], arrays[j][end], end);
}
v = joinArrays(arrays, numArrays, floor(numElements / intPieces), mod, modIndex);
releaseArrays(arrays, numArrays);
if(DEBUG)
printf("\nArray final, first %d, last %d\n", v[0], v[numElements-1]);
if(DEBUG >= 4)
for(j = 0; j < numElements; j++)
printf("%d\n", v[j]);
//uint32_t clocks = clock();
//quickSort(v, 0, numElements-1);
//clocks = clock() - clocks;
//printf("Vetor de %u elementos ordenado com quicksort sequencial em %u ciclos de clock\n", numElements, clocks);
printf("Vetor de %u elementos ordenado com quicksort em paralelo (%d threads). \nDentre o tempo de processamento de cada node, o maior foi %llu, %.2f seconds.\n\n", numElements, maxProcessors-1, processTime, (float)processTime/CLOCKS_PER_SEC);
free(v);
free(arrays);
}
}
else
{
uint32_t i;
for(i = 0; i < NCASES; i++)
{
uint8_t left = 1;
int32_t nodeCalcMod;
MPI_Bcast(&nodeCalcMod, 1, MPI_INT, RANK_MASTER, MPI_COMM_WORLD);
if(nodeCalcMod == rank)
left++;
while(left > 0)
{
uint32_t size;
int32_t *array, tagModifier;
MPI_Status status;
MPI_Recv(&tagModifier, 1, MPI_INT, RANK_MASTER, 1, MPI_COMM_WORLD, &status);
MPI_Recv(&size, 1, MPI_UNSIGNED, RANK_MASTER, 10+tagModifier, MPI_COMM_WORLD, &status);
array = (int32_t*)malloc( sizeof(int32_t) * size);
MPI_Recv(array, size, MPI_INT, RANK_MASTER, 20+tagModifier, MPI_COMM_WORLD, &status);
if(DEBUG >= 3)
printf("Received on rank %d, array of size %d; first %d, last %d, tagModifier %d\n", rank, size, array[0], array[size-1], tagModifier);
uint64_t clocks = clock();
quickSort(array, 0, size-1);
clocks = clock() - clocks;
MPI_Send(&size, 1, MPI_UNSIGNED, RANK_MASTER, 1+tagModifier, MPI_COMM_WORLD);
MPI_Send(array, size, MPI_INT, RANK_MASTER, 2+tagModifier, MPI_COMM_WORLD);
MPI_Send(&clocks, 1, MPI_UNSIGNED_LONG_LONG, RANK_MASTER, 3+tagModifier, MPI_COMM_WORLD);
if(DEBUG >= 3)
printf("Sent from rank %d to master the array sorted\n", rank);
left--;
}
}
}
MPI_Finalize();
}
return 0;
}