Advertisement
V_oron

CUDA: test occupation #1

Sep 21st, 2012
158
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 1.46 KB | None | 0 0
  1. #include <stdio.h>
  2.  
  3. const int REPEAT_TEST            = 10;
  4. const int ITERATIONs             = 0xFFF;
  5. const int THREADS_PER_BLOCK_max  = 512;
  6. const int THREADS_PER_BLOCK_step = 32;
  7. const int BLOCKS_max             = 60;
  8.  
  9.  
  10. __global__ void Kernel(float *res)
  11. {
  12.     float a = threadIdx.x + blockIdx.x*blockDim.x;
  13.     for(int j = 1; j < ITERATIONs; ++j)
  14.     {
  15.         a += sinf(a*1.01 + 0.01) + cosf(a*0.99 - 0.01);
  16.         //++a;
  17.     }
  18. }
  19.  
  20.  
  21. int main(void)
  22. {
  23.     float *res_d;
  24.     float elapsedTime;
  25.    
  26.     cudaEvent_t start, stop;
  27.     cudaEventCreate(&start);
  28.     cudaEventCreate(&stop);
  29.    
  30.     FILE *fo = fopen("out.txt", "w");
  31.     fprintf(fo, "%i  %i  %i\n", BLOCKS_max, THREADS_PER_BLOCK_step, THREADS_PER_BLOCK_max);
  32.    
  33.     cudaMalloc(&res_d, BLOCKS_max*THREADS_PER_BLOCK_max*sizeof(float));
  34.     for(int nb = 1; nb <= BLOCKS_max; ++nb)
  35.     for(int nt = THREADS_PER_BLOCK_step; nt <= THREADS_PER_BLOCK_max; nt += THREADS_PER_BLOCK_step)
  36.     {
  37.         printf("BLOCKS = %3i, THREADS_PER_BLOCK = %3i:\n", nb, nt);
  38.        
  39.         elapsedTime = 0;
  40.         for(int r = 1; r <= REPEAT_TEST; ++r)
  41.         {
  42.             printf("  r = %i/%i\r", r, REPEAT_TEST);
  43.             fflush(stdout);
  44.             cudaEventRecord(start, 0);
  45.             Kernel <<<nb, nt>>> (res_d);
  46.             cudaEventRecord(stop, 0);
  47.             cudaEventSynchronize(stop);
  48.             cudaEventElapsedTime(&elapsedTime, start, stop);
  49.         }
  50.        
  51.         printf("  Elapsed Time: %20.14f\n", elapsedTime/REPEAT_TEST);
  52.         fprintf(fo, "%i  %i  %20.14f\n", nb, nt, elapsedTime/REPEAT_TEST);
  53.     }
  54.    
  55.     cudaFree(res_d);
  56.     fclose(fo);
  57.    
  58.     return 0;
  59. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement