Advertisement
Guest User

Untitled

a guest
Oct 23rd, 2017
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.47 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <time.h>
  3. #include <stdlib.h>
  4. #include "math.h"
  5.  
  6.  
  7. //#define N 200000000
  8. //#define BLOCK_SIZE 1024
  9.  
  10. struct cudaDeviceProp props;
  11.  
  12. int N = 10000000;
  13. int BLOCK_SIZE = 1024;
  14. float *hArray;
  15. float *dArray;
  16. int blocks;
  17.  
  18. void computeUsingCPU(){
  19. for( int i=0; i<N; i++ ){
  20. hArray[i] = hArray[i] * hArray[i] * hArray[i] + hArray[i] * hArray[i] + hArray[i];
  21. }
  22. }
  23.  
  24. void prepare(){
  25. hArray = (float*) malloc(sizeof(float)*N);
  26. memset(hArray, 0, sizeof(hArray));
  27. for(int i = 0; i < N; i++) {
  28. hArray[i] = i + 1;
  29. }
  30. }
  31.  
  32. void prologue(void) {
  33. cudaMalloc((void**)&dArray, sizeof(hArray));
  34. cudaMemcpy(dArray, hArray, sizeof(hArray), cudaMemcpyHostToDevice);
  35. }
  36.  
  37. void epilogue(void) {
  38. cudaMemcpy(hArray, dArray, sizeof(hArray), cudaMemcpyDeviceToHost);
  39. cudaFree(dArray);
  40. }
  41.  
  42.  
  43. // Kernel
  44. __global__ void pow3(float *A, int N) {
  45. int x = blockDim.x * blockIdx.x + threadIdx.x;
  46.  
  47. if(x < N)
  48. A[x] = A[x] * A[x] * A[x] + A[x] * A[x] + A[x];
  49. }
  50.  
  51. struct Stopwatch
  52. {
  53. clock_t _start;
  54. clock_t _stop;
  55.  
  56. void start(){
  57. _start = clock();
  58. }
  59.  
  60. void stop(){
  61. _stop = clock();
  62. }
  63.  
  64. void init()
  65. {
  66. _start = 0;
  67. _stop = 0;
  68. }
  69.  
  70. double getValue(){
  71. return ((double)((double)_stop - (double)_start)/(double)CLOCKS_PER_SEC);
  72. }
  73. };
  74.  
  75.  
  76. int main(int argc, char** argv)
  77. {
  78. struct Stopwatch *copyToGPUTime = (struct Stopwatch*)malloc(sizeof(struct Stopwatch));
  79. copyToGPUTime->init();
  80.  
  81. struct Stopwatch *computeDataUsingGPUTime = (struct Stopwatch*)malloc(sizeof(struct Stopwatch));
  82. computeDataUsingGPUTime->init();
  83.  
  84. struct Stopwatch *copyFromGPUTime = (struct Stopwatch*)malloc(sizeof(struct Stopwatch));
  85. copyFromGPUTime->init();
  86.  
  87. struct Stopwatch *computeUsingCPUTime = (struct Stopwatch*)malloc(sizeof(struct Stopwatch));
  88. computeUsingCPUTime->init();
  89.  
  90. FILE *dFile = fopen("GPUresult.txt", "w");
  91. char string[5000] = {0};
  92.  
  93. int devCnt;
  94. cudaGetDeviceCount(&devCnt);
  95. if(devCnt == 0)
  96. {
  97. perror("No CUDA devices available -- exiting.");
  98. return 1;
  99. }
  100.  
  101. for(int i=1; i<=50; i++)
  102. {
  103. N = 1024*i*i*i*i*i;
  104.  
  105. // GPU part ///////////////////////////////////////
  106.  
  107. prepare();
  108.  
  109. copyToGPUTime->start();
  110. prologue();
  111. copyToGPUTime->stop();
  112.  
  113. blocks = N / BLOCK_SIZE;
  114. if(N % BLOCK_SIZE)
  115. blocks++;
  116.  
  117. computeDataUsingGPUTime->start();
  118. pow3<<<blocks, BLOCK_SIZE>>>(dArray, N);
  119. cudaThreadSynchronize();
  120. computeDataUsingGPUTime->stop();
  121.  
  122.  
  123. copyFromGPUTime->start();
  124. epilogue();
  125. copyFromGPUTime->stop();
  126.  
  127.  
  128.  
  129.  
  130. // CPU part ///////////////////////////////////////
  131.  
  132. computeUsingCPUTime->start();
  133. computeUsingCPU();
  134. computeUsingCPUTime->stop();
  135.  
  136.  
  137.  
  138.  
  139. // Print ///////////////////////////////////////
  140.  
  141. sprintf(string, "%d %.12f %.12f %.12f %.12f %.12f\n",N, copyToGPUTime->getValue(), computeDataUsingGPUTime->getValue(), copyFromGPUTime->getValue(),copyToGPUTime->getValue() + computeDataUsingGPUTime->getValue() + copyFromGPUTime->getValue(), computeUsingCPUTime->getValue());
  142. fprintf(dFile, string, 0);
  143. printf(" GPU - copy data to memory: %.12fs\n GPU - computing: %.12fs\n GPU - copy data from memory: %.12fs\n CPU - computing: %.12fs\n",
  144. copyToGPUTime->getValue(), computeDataUsingGPUTime->getValue(), copyFromGPUTime->getValue(), computeUsingCPUTime->getValue());
  145.  
  146. }
  147.  
  148.  
  149. fclose(dFile);
  150.  
  151. free(hArray);
  152. free(copyToGPUTime);
  153. free(computeDataUsingGPUTime);
  154. free(copyFromGPUTime);
  155. free(computeUsingCPUTime);
  156. return 0;
  157. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement