#include #include #include #include #include "mpi.h" #define DEBUG 1 typedef unsigned char uint8_t; typedef signed int int32_t; typedef unsigned int uint32_t; typedef signed long long int int64_t; typedef unsigned long long int uint64_t; #define NCASES 5 uint32_t testCases[NCASES] = { 100, 200, 400, 800, 1600 }; #define RANK_MASTER 0 int32_t maxProcessors, rank; void quickSort(int32_t arr[], int32_t left, int32_t right) { int32_t i = left, j = right, tmp, pivot = arr[left]; while (i <= j) { while (arr[i] < pivot) i++; while (arr[j] > pivot) j--; if (i <= j) { tmp = arr[i]; arr[i] = arr[j]; arr[j] = tmp; i++; j--; } } if(left < j) quickSort(arr, left, j); if(i < right) quickSort(arr, i, right); } int32_t **splitArray(int32_t *array, int32_t size, int32_t *piecesToCut, int32_t *mod) { int32_t pieces = *piecesToCut; int32_t piecesPerChunk = size / pieces; int32_t i, **result, j, k = 0, numAllocations = pieces + 1; *mod = size - piecesPerChunk * pieces; if(*mod == 0) numAllocations--; *piecesToCut = numAllocations; result = (int32_t**) malloc( sizeof(int32_t*) * numAllocations); for(i = 0; i < numAllocations; i++) { if(i == numAllocations-1 && (*mod != 0)) { result[i] = (int32_t*) malloc( sizeof(int32_t) * (*mod)); for(j = 0; j < *mod; j++) result[i][j] = array[k++]; } else { result[i] = (int32_t*) malloc( sizeof(int32_t) * piecesPerChunk); for(j = 0; j < piecesPerChunk; j++) result[i][j] = array[k++]; } } return result; } int32_t *joinArrays(int32_t **arrays, int32_t size, int32_t piecesPerChunk, int32_t mod, int32_t modIndex) { int32_t sizeTemp = size; if(mod != 0) sizeTemp--; int32_t *result = (int32_t*) malloc( sizeof(int32_t) * (piecesPerChunk*sizeTemp + mod)), i, j, k = 0; for(i = 0; i < size; i++) { int32_t tempSize = piecesPerChunk; if(i == modIndex) tempSize = mod; for(j = 0; j < tempSize; j++) result[k++] = arrays[i][j]; } return result; } void swapArrays(int32_t **v, int32_t i) { int32_t *aux; aux = v[i]; v[i] = v[i+1]; v[i+1] = aux; } void sortArrays(int32_t **v, int32_t size, int32_t piecesPerChunck, int32_t mod, int32_t *mIndex) { int32_t i; uint8_t swept; int32_t sizeOriginal = size; int32_t modIndex = sizeOriginal-1; do { size--; swept = 0; for(i = 0; i < size; i++) { int32_t sizeTemp = piecesPerChunck; if(i+1 == modIndex && mod != 0) sizeTemp = mod; if(v[i][0] > v[i+1][sizeTemp-1]) { if(sizeTemp == mod) modIndex = i; swapArrays(v, i); swept = 1; } } } while(swept); if(mod == 0) modIndex = -1; *mIndex = modIndex; } void releaseArrays(int32_t **arrays, int32_t size) { int32_t i; for(i = 0; i < size; i++) free(arrays[i]); } int main(int argc, char* argv[]) { int32_t rc = MPI_Init(&argc, &argv); if (rc == MPI_SUCCESS) { MPI_Comm_size(MPI_COMM_WORLD, &maxProcessors); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if(rank == RANK_MASTER) { uint8_t i; int32_t *v, j; for(i = 0; i < NCASES; i++) { uint32_t numElements = testCases[i] * 1000, sizeTemp; int32_t mod, numArrays = maxProcessors-1, intPieces = maxProcessors-1; v = (int32_t*)malloc( sizeof(int32_t) * numElements ); for(j = 0; j < numElements; j++) v[j] = numElements - j; int32_t **arrays = splitArray(v, numElements, &numArrays, &mod); sizeTemp = (numElements / intPieces); int32_t nodeCalcMod = (mod == 0 ? -1 : maxProcessors-1); MPI_Bcast(&nodeCalcMod, 1, MPI_INT, RANK_MASTER, MPI_COMM_WORLD); for(j = 1; j <= numArrays; j++) { int32_t destination = j, tagModifier = 0; if(j == numArrays && mod != 0) { sizeTemp = mod; destination = j-1; tagModifier = 10; } if(DEBUG >= 3) printf("Sent to rank %d, array of size %d to process\n", destination, sizeTemp); MPI_Send(&tagModifier, 1, MPI_INT, destination, 1, MPI_COMM_WORLD); MPI_Send(&sizeTemp, 1, MPI_UNSIGNED, destination, 10+tagModifier, MPI_COMM_WORLD); MPI_Send(arrays[j-1], sizeTemp, MPI_INT, destination, 20+tagModifier, MPI_COMM_WORLD); } uint64_t processTime = 0, tempTime; for(j = 1; j <= numArrays; j++) { int32_t source = j, tagModifier = 0; MPI_Status status; if(j == numArrays && mod != 0) { source = j-1; tagModifier = 10; } MPI_Recv(&sizeTemp, 1, MPI_UNSIGNED, source, 1+tagModifier, MPI_COMM_WORLD, &status); MPI_Recv(arrays[j-1], sizeTemp, MPI_INT, source, 2+tagModifier, MPI_COMM_WORLD, &status); MPI_Recv(&tempTime, 1, MPI_UNSIGNED_LONG_LONG, source, 3+tagModifier, MPI_COMM_WORLD, &status); if(tempTime > processTime) processTime = tempTime; if(DEBUG >= 3) printf("Received sorted array from rank %d with %d elements\n", source, sizeTemp); } if(DEBUG >= 2) { printf("\n\n"); sizeTemp = (numElements / intPieces); for(j = 0; j < numArrays; j++) { int32_t end = sizeTemp-1; if(j == numArrays-1 && mod != 0) end = mod-1; printf("Array of index %d starts in %d and ends in %d, end is %d, mod %d, numarrays %d\n", j, arrays[j][0], arrays[j][end], end, mod, numArrays); } } int32_t modIndex; sortArrays(arrays, numArrays, (numElements / intPieces), mod, &modIndex); if(DEBUG >= 2) for(j = 0; j < numArrays; j++) { int32_t end = sizeTemp-1; if(j == modIndex) end = mod-1; printf("2 Array of index %d starts in %d and ends in %d, end is %d\n", j, arrays[j][0], arrays[j][end], end); } v = joinArrays(arrays, numArrays, floor(numElements / intPieces), mod, modIndex); releaseArrays(arrays, numArrays); if(DEBUG) printf("\nArray final, first %d, last %d\n", v[0], v[numElements-1]); if(DEBUG >= 4) for(j = 0; j < numElements; j++) printf("%d\n", v[j]); //uint32_t clocks = clock(); //quickSort(v, 0, numElements-1); //clocks = clock() - clocks; //printf("Vetor de %u elementos ordenado com quicksort sequencial em %u ciclos de clock\n", numElements, clocks); printf("Vetor de %u elementos ordenado com quicksort em paralelo (%d threads). \nDentre o tempo de processamento de cada node, o maior foi %llu, %.2f seconds.\n\n", numElements, maxProcessors-1, processTime, (float)processTime/CLOCKS_PER_SEC); free(v); free(arrays); } } else { uint32_t i; for(i = 0; i < NCASES; i++) { uint8_t left = 1; int32_t nodeCalcMod; MPI_Bcast(&nodeCalcMod, 1, MPI_INT, RANK_MASTER, MPI_COMM_WORLD); if(nodeCalcMod == rank) left++; while(left > 0) { uint32_t size; int32_t *array, tagModifier; MPI_Status status; MPI_Recv(&tagModifier, 1, MPI_INT, RANK_MASTER, 1, MPI_COMM_WORLD, &status); MPI_Recv(&size, 1, MPI_UNSIGNED, RANK_MASTER, 10+tagModifier, MPI_COMM_WORLD, &status); array = (int32_t*)malloc( sizeof(int32_t) * size); MPI_Recv(array, size, MPI_INT, RANK_MASTER, 20+tagModifier, MPI_COMM_WORLD, &status); if(DEBUG >= 3) printf("Received on rank %d, array of size %d; first %d, last %d, tagModifier %d\n", rank, size, array[0], array[size-1], tagModifier); uint64_t clocks = clock(); quickSort(array, 0, size-1); clocks = clock() - clocks; MPI_Send(&size, 1, MPI_UNSIGNED, RANK_MASTER, 1+tagModifier, MPI_COMM_WORLD); MPI_Send(array, size, MPI_INT, RANK_MASTER, 2+tagModifier, MPI_COMM_WORLD); MPI_Send(&clocks, 1, MPI_UNSIGNED_LONG_LONG, RANK_MASTER, 3+tagModifier, MPI_COMM_WORLD); if(DEBUG >= 3) printf("Sent from rank %d to master the array sorted\n", rank); left--; } } } MPI_Finalize(); } return 0; }