Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <time.h>
- #include <stdlib.h>
- #include "math.h"
- //#define N 200000000
- //#define BLOCK_SIZE 1024
- struct cudaDeviceProp props;
- int N = 10000000;
- int BLOCK_SIZE = 1024;
- float *hArray;
- float *dArray;
- int blocks;
- void computeUsingCPU(){
- for( int i=0; i<N; i++ ){
- hArray[i] = hArray[i] * hArray[i] * hArray[i] + hArray[i] * hArray[i] + hArray[i];
- }
- }
- void prepare(){
- hArray = (float*) malloc(sizeof(float)*N);
- memset(hArray, 0, sizeof(hArray));
- for(int i = 0; i < N; i++) {
- hArray[i] = i + 1;
- }
- }
- void prologue(void) {
- cudaMalloc((void**)&dArray, sizeof(hArray));
- cudaMemcpy(dArray, hArray, sizeof(hArray), cudaMemcpyHostToDevice);
- }
- void epilogue(void) {
- cudaMemcpy(hArray, dArray, sizeof(hArray), cudaMemcpyDeviceToHost);
- cudaFree(dArray);
- }
- // Kernel
- __global__ void pow3(float *A, int N) {
- int x = blockDim.x * blockIdx.x + threadIdx.x;
- if(x < N)
- A[x] = A[x] * A[x] * A[x] + A[x] * A[x] + A[x];
- }
- struct Stopwatch
- {
- clock_t _start;
- clock_t _stop;
- void start(){
- _start = clock();
- }
- void stop(){
- _stop = clock();
- }
- void init()
- {
- _start = 0;
- _stop = 0;
- }
- double getValue(){
- return ((double)((double)_stop - (double)_start)/(double)CLOCKS_PER_SEC);
- }
- };
- int main(int argc, char** argv)
- {
- struct Stopwatch *copyToGPUTime = (struct Stopwatch*)malloc(sizeof(struct Stopwatch));
- copyToGPUTime->init();
- struct Stopwatch *computeDataUsingGPUTime = (struct Stopwatch*)malloc(sizeof(struct Stopwatch));
- computeDataUsingGPUTime->init();
- struct Stopwatch *copyFromGPUTime = (struct Stopwatch*)malloc(sizeof(struct Stopwatch));
- copyFromGPUTime->init();
- struct Stopwatch *computeUsingCPUTime = (struct Stopwatch*)malloc(sizeof(struct Stopwatch));
- computeUsingCPUTime->init();
- FILE *dFile = fopen("GPUresult.txt", "w");
- char string[5000] = {0};
- int devCnt;
- cudaGetDeviceCount(&devCnt);
- if(devCnt == 0)
- {
- perror("No CUDA devices available -- exiting.");
- return 1;
- }
- for(int i=1; i<=50; i++)
- {
- N = 1024*i*i*i*i*i;
- // GPU part ///////////////////////////////////////
- prepare();
- copyToGPUTime->start();
- prologue();
- copyToGPUTime->stop();
- blocks = N / BLOCK_SIZE;
- if(N % BLOCK_SIZE)
- blocks++;
- computeDataUsingGPUTime->start();
- pow3<<<blocks, BLOCK_SIZE>>>(dArray, N);
- cudaThreadSynchronize();
- computeDataUsingGPUTime->stop();
- copyFromGPUTime->start();
- epilogue();
- copyFromGPUTime->stop();
- // CPU part ///////////////////////////////////////
- computeUsingCPUTime->start();
- computeUsingCPU();
- computeUsingCPUTime->stop();
- // Print ///////////////////////////////////////
- sprintf(string, "%d %.12f %.12f %.12f %.12f %.12f\n",N, copyToGPUTime->getValue(), computeDataUsingGPUTime->getValue(), copyFromGPUTime->getValue(),copyToGPUTime->getValue() + computeDataUsingGPUTime->getValue() + copyFromGPUTime->getValue(), computeUsingCPUTime->getValue());
- fprintf(dFile, string, 0);
- printf(" GPU - copy data to memory: %.12fs\n GPU - computing: %.12fs\n GPU - copy data from memory: %.12fs\n CPU - computing: %.12fs\n",
- copyToGPUTime->getValue(), computeDataUsingGPUTime->getValue(), copyFromGPUTime->getValue(), computeUsingCPUTime->getValue());
- }
- fclose(dFile);
- free(hArray);
- free(copyToGPUTime);
- free(computeDataUsingGPUTime);
- free(copyFromGPUTime);
- free(computeUsingCPUTime);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement