Advertisement
Guest User

Untitled

a guest
Jul 24th, 2014
412
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.15 KB | None | 0 0
  1. $ cat t503.cu
  2. #include <stdio.h>
  3.  
  4. #include <thrust/host_vector.h>
  5. #include <thrust/device_vector.h>
  6. #include <thrust/inner_product.h>
  7. #include <thrust/complex.h>
  8. #include <thrust/execution_policy.h>
  9.  
  10. int main(int argc, char **argv)
  11. {
  12. int vec_size = atoi(argv[1]);
  13. int iterations = atoi(argv[2]);
  14.  
  15. float milliseconds = 0;
  16.  
  17. cudaEvent_t start, stop;
  18. cudaEventCreate(&start);
  19. cudaEventCreate(&stop);
  20.  
  21. thrust::host_vector< thrust::complex<float> > host_x( vec_size );
  22. thrust::generate(host_x.begin(), host_x.end(), rand);
  23.  
  24. thrust::host_vector< thrust::complex<float> > host_y( vec_size );
  25. thrust::generate(host_y.begin(), host_y.end(), rand);
  26.  
  27. printf("vector size = %lu bytes\n", vec_size * sizeof(thrust::complex<float>));
  28.  
  29. cudaEventRecord(start);
  30.  
  31. thrust::device_vector< thrust::complex<float> > device_x = host_x;
  32. thrust::device_vector< thrust::complex<float> > device_y = host_y;
  33. cudaEventRecord(stop);
  34.  
  35. cudaEventSynchronize(stop);
  36. cudaEventElapsedTime(&milliseconds, start, stop);
  37.  
  38. printf("copy (device)\t\t%f ms\n", milliseconds);
  39.  
  40. cudaEventRecord(start);
  41.  
  42. for(int i = 0; i < iterations; ++i)
  43. {
  44. thrust::inner_product(thrust::cuda::par, device_x.begin(), device_x.end(), device_y.begin(), thrust::complex<float>(0.0f,0.0f) );
  45. }
  46.  
  47. cudaEventRecord(stop);
  48.  
  49. cudaEventSynchronize(stop);
  50. cudaEventElapsedTime(&milliseconds, start, stop);
  51.  
  52. printf("inner_product (device)\t%f ms\n", milliseconds/iterations);
  53.  
  54. cudaEventRecord(start);
  55.  
  56. for(int i = 0; i < iterations; ++i)
  57. {
  58. thrust::inner_product(thrust::host, host_x.begin(), host_x.end(), host_y.begin(), thrust::complex<float>(0.0f,0.0f) );
  59. }
  60.  
  61. cudaEventRecord(stop);
  62.  
  63. cudaEventSynchronize(stop);
  64. cudaEventElapsedTime(&milliseconds, start, stop);
  65.  
  66. printf("inner_product (host)\t%f ms\n", milliseconds/iterations);
  67.  
  68. return 0;
  69. }
  70. $ nvcc -O3 -arch=sm_20 -o t503 t503.cu
  71. $ ./t503 3100000 1000
  72. vector size = 24800000 bytes
  73. copy (device) 25.308224 ms
  74. inner_product (device) 2.992532 ms
  75. inner_product (host) 30.961256 ms
  76. $
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement