Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- const int REPEAT_TEST = 10;
- const int ITERATIONs = 0xFFF;
- const int THREADS_PER_BLOCK_max = 512;
- const int THREADS_PER_BLOCK_step = 32;
- const int BLOCKS_max = 60;
- __global__ void Kernel(float *res)
- {
- float a = threadIdx.x + blockIdx.x*blockDim.x;
- for(int j = 1; j < ITERATIONs; ++j)
- {
- a += sinf(a*1.01 + 0.01) + cosf(a*0.99 - 0.01);
- //++a;
- }
- }
- int main(void)
- {
- float *res_d;
- float elapsedTime;
- cudaEvent_t start, stop;
- cudaEventCreate(&start);
- cudaEventCreate(&stop);
- FILE *fo = fopen("out.txt", "w");
- fprintf(fo, "%i %i %i\n", BLOCKS_max, THREADS_PER_BLOCK_step, THREADS_PER_BLOCK_max);
- cudaMalloc(&res_d, BLOCKS_max*THREADS_PER_BLOCK_max*sizeof(float));
- for(int nb = 1; nb <= BLOCKS_max; ++nb)
- for(int nt = THREADS_PER_BLOCK_step; nt <= THREADS_PER_BLOCK_max; nt += THREADS_PER_BLOCK_step)
- {
- printf("BLOCKS = %3i, THREADS_PER_BLOCK = %3i:\n", nb, nt);
- elapsedTime = 0;
- for(int r = 1; r <= REPEAT_TEST; ++r)
- {
- printf(" r = %i/%i\r", r, REPEAT_TEST);
- fflush(stdout);
- cudaEventRecord(start, 0);
- Kernel <<<nb, nt>>> (res_d);
- cudaEventRecord(stop, 0);
- cudaEventSynchronize(stop);
- cudaEventElapsedTime(&elapsedTime, start, stop);
- }
- printf(" Elapsed Time: %20.14f\n", elapsedTime/REPEAT_TEST);
- fprintf(fo, "%i %i %20.14f\n", nb, nt, elapsedTime/REPEAT_TEST);
- }
- cudaFree(res_d);
- fclose(fo);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement