Advertisement
Guest User

Untitled

a guest
Dec 26th, 2013
117
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.38 KB | None | 0 0
  1. $ cat t21.cu
  2. #include <stdio.h>
  3. #include <time.h>
  4. #include <sys/time.h>
  5. #define DSIZE 32768
  6. #define NUM_COPY 10
  7. int main(){
  8.  
  9. struct timeval t1, t2;
  10. int *d_A, *h_A;
  11. int copy_count = NUM_COPY;
  12. cudaHostAlloc((void **)&h_A, DSIZE*sizeof(int), cudaHostAllocDefault);
  13. cudaMalloc((void **)&d_A, DSIZE*sizeof(int));
  14. cudaStream_t my_stream;
  15. cudaStreamCreate(&my_stream);
  16. cudaMemcpyAsync(d_A, h_A, DSIZE*sizeof(int), cudaMemcpyHostToDevice, my_stream);
  17. gettimeofday(&t1, NULL);
  18. gettimeofday(&t2, NULL);
  19. int time = (t2.tv_sec * (unsigned int)1e6 + t2.tv_usec) - (t1.tv_sec * (unsigned int)1e6 + t1.tv_usec);
  20. printf("null timing = %dus\n", time);
  21. for (int i = 0; i < 5; i ++){
  22. gettimeofday(&t1, NULL);
  23. for (int j = 0; j < copy_count; j++)
  24. cudaMemcpyAsync(d_A, h_A, DSIZE*sizeof(int), cudaMemcpyHostToDevice, my_stream);
  25. gettimeofday(&t2, NULL);
  26. time = (t2.tv_sec * (unsigned int)1e6 + t2.tv_usec) - (t1.tv_sec * (unsigned int)1e6 + t1.tv_usec);
  27. printf("%d call timing = %fus average\n", copy_count, (float)time/(float)copy_count);
  28. copy_count *= NUM_COPY;
  29. }
  30. return 0;
  31. }
  32.  
  33.  
  34. $ nvcc -arch=sm_20 -o t21 t21.cu
  35. $ ./t21
  36. null timing = 1us
  37. 10 call timing = 2.800000us average
  38. 100 call timing = 2.410000us average
  39. 1000 call timing = 2.409000us average
  40. 10000 call timing = 26.297800us average
  41. 100000 call timing = 26.338289us average
  42. $
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement