/*******************************************************************************
*** NAME : Tyler McEntee ***
*** CLASS : CSC 346 ***
*** ASSIGNMENT : Assignment 8 ***
*** DUE DATE : 5/2/12 ***
*** INSTRUCTOR : GAMRADT ***
*******************************************************************************
*** DESCRIPTION :
*** nvcc –Xcompiler –fopenmp –lgomp –arch sm_13 mcenteet8.cu ***
******************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
void intro(void);
void fillZeroes(double *, int);
void getParamaters(char *[], int *, int *, long int *);
double reduceArray(double *, int);
void displayResults(int, long int, double, double);
__device__ void f(double, double *);
__global__ void calcTrapezoid(double *, long int, double, int);
int main(int argc, char *argv[]) {
int blocks, threadsPerBlock, totalThreads, arraySize;
long int totalParts, partsPerThread;
double start, end;
double piEst, width, *hostPi, *devicePi, a = 0.0, b = 1.0;
intro();
/*getParamaters(argv, &blocks, &threadsPerBlock, &totalParts);*/
blocks = 16;
threadsPerBlock = 512;
totalParts = 5242880000;
start = omp_get_wtime();
totalThreads = blocks * threadsPerBlock;
partsPerThread = totalParts / totalThreads;
width = (b - a) / totalParts;
arraySize = totalThreads * sizeof(double);
printf("width = %.16f\n", width);
printf("totalParts = %ld\n", totalParts);
printf("arraySize = %d", arraySize);
hostPi = (double *)malloc(arraySize);
if (hostPi == NULL) {
printf("malloc failed\n");
}
else {
if (cudaMalloc((void**)&devicePi, arraySize) != cudaSuccess) {
printf("cudaMalloc failed\n");
}
else {
printf("2.\n");
fillZeroes(hostPi, threadsPerBlock);
cudaMemcpy(devicePi, hostPi, arraySize, cudaMemcpyHostToDevice);
calcTrapezoid<<<blocks, threadsPerBlock>>>(devicePi, totalParts, width, totalThreads);
cudaMemcpy(hostPi, devicePi, arraySize, cudaMemcpyDeviceToHost);
printf("3.\n");
piEst = reduceArray(hostPi, arraySize);
printf("PiEst = %f\n", piEst);
end = omp_get_wtime();
displayResults(totalThreads, totalParts, piEst, (end - start));
cudaFree(devicePi);
}
free(hostPi);
}
return 0;
}
/*******************************************************************************
*** FUNCTION Intro ***
*******************************************************************************
*** DESCRIPTION : Introduction to the program. ***
*** INPUT ARGS : none ***
*** OUTPUT ARGS : none ***
*** IN/OUT ARGS : none ***
*** RETURN : none ***
******************************************************************************/
void intro( void ) {
printf("Change\n");
printf("this\n");
printf("later\n");
printf("do it.\n");
printf("\n");
printf("\n\n");
return;
}
/*******************************************************************************
*** FUNCTION getParameters ***
*******************************************************************************
*** DESCRIPTION : Gets the filename from the user or commandline arg ***
*** INPUT ARGS : char **argv, int *parameters ***
*** OUTPUT ARGS : none ***
*** IN/OUT ARGS : none ***
*** RETURN : none ***
******************************************************************************/
void getParamaters(char **argv, int *blocks, int *threadsPerBlock, long int *partitions) {
*blocks = atoi(argv[1]);
*threadsPerBlock = atoi(argv[2]);
*partitions = atol(argv[3]);
return;
}
/*******************************************************************************
*** FUNCTION fillZereos ***
*******************************************************************************
*** DESCRIPTION : Gets the filename from the user or commandline arg ***
*** INPUT ARGS : char **argv, int *parameters ***
*** OUTPUT ARGS : none ***
*** IN/OUT ARGS : none ***
*** RETURN : none ***
******************************************************************************/
void fillZeroes(double *array, int size) {
int i;
#pragma omp parallel for default(none) private(i) shared(size, array)
for (i = 0; i < size; i++) {
array[i] = 0;
}
return;
}
/*******************************************************************************
*** FUNCTION f ***
*******************************************************************************
*** DESCRIPTION : calcualtes the area of a given trapezoid ***
*** INPUT ARGS : double x ***
*** OUTPUT ARGS : none ***
*** IN/OUT ARGS : none ***
*** RETURN : 4.0 / (1 + x * x) ***
******************************************************************************/
__device__ void f(double x, double *xCalc) {
*xCalc= 4.0 / (1.0 + x * x);
}
/****************************************************************************
*****************************************************************************
Function: calcTrapezoid
*****************************************************************************
Description: Outputs an array containing a complete list of partial sums
for pi.
Input args: totalParts, width, totalThreads
Output args: sum
In/Out args: none
Return: void
*****************************************************************************
****************************************************************************/
__global__ void calcTrapezoid(double *sum, long int totalParts, double width, int totalThreads) {
double approx;
int i, index = blockIdx.x * blockDim.x + threadIdx.x;
for (i = index; i < totalParts; i += totalThreads) {
f(i * width, &approx);
sum[i] += approx;
}
}
/****************************************************************************
*****************************************************************************
Function: reduceArray
*****************************************************************************
Description: Returns the sum of all the elements of a specified array using
an OpenMP parallel for reduction statement.
Input args: array, size
Output args: none
In/Out args: none
Return: sum
*****************************************************************************
****************************************************************************/
double reduceArray(double array[], int size) {
int i;
double sum = 0.0;
#pragma omp parallel for default(none) reduction(+: sum) private(i) shared(size, array)
for (i = 0; i < size; i++)
sum += array[i];
return sum;
}
/****************************************************************************
*****************************************************************************
Function: displayResults
*****************************************************************************
Description: Displays the thread count, paritition count, approximation for
pi, and the overall time taken to perform the calculation.
Input args: threads, partitions, result, time
Output args: none
In/Out args: none
Return: void
*****************************************************************************
****************************************************************************/
void displayResults(int threads, long int partitions, double result, double time) {
printf("\nThread count: %-10d\n", threads);
printf("Partition count: %-10ld\n", partitions);
printf("Approximate value of integral: %.17f\n", result);
printf("Overall time: %.17f\n\n", time);
return;
}