Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- $ cat t21.cu
- #include <stdio.h>
- #include <time.h>
- #include <sys/time.h>
- #define DSIZE 32768
- #define NUM_COPY 10
- int main(){
- struct timeval t1, t2;
- int *d_A, *h_A;
- int copy_count = NUM_COPY;
- cudaHostAlloc((void **)&h_A, DSIZE*sizeof(int), cudaHostAllocDefault);
- cudaMalloc((void **)&d_A, DSIZE*sizeof(int));
- cudaStream_t my_stream;
- cudaStreamCreate(&my_stream);
- cudaMemcpyAsync(d_A, h_A, DSIZE*sizeof(int), cudaMemcpyHostToDevice, my_stream);
- gettimeofday(&t1, NULL);
- gettimeofday(&t2, NULL);
- int time = (t2.tv_sec * (unsigned int)1e6 + t2.tv_usec) - (t1.tv_sec * (unsigned int)1e6 + t1.tv_usec);
- printf("null timing = %dus\n", time);
- for (int i = 0; i < 5; i ++){
- gettimeofday(&t1, NULL);
- for (int j = 0; j < copy_count; j++)
- cudaMemcpyAsync(d_A, h_A, DSIZE*sizeof(int), cudaMemcpyHostToDevice, my_stream);
- gettimeofday(&t2, NULL);
- time = (t2.tv_sec * (unsigned int)1e6 + t2.tv_usec) - (t1.tv_sec * (unsigned int)1e6 + t1.tv_usec);
- printf("%d call timing = %fus average\n", copy_count, (float)time/(float)copy_count);
- copy_count *= NUM_COPY;
- }
- return 0;
- }
- $ nvcc -arch=sm_20 -o t21 t21.cu
- $ ./t21
- null timing = 1us
- 10 call timing = 2.800000us average
- 100 call timing = 2.410000us average
- 1000 call timing = 2.409000us average
- 10000 call timing = 26.297800us average
- 100000 call timing = 26.338289us average
- $
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement