Advertisement
Guest User

Untitled

a guest
Aug 21st, 2019
104
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.19 KB | None | 0 0
  1. $ cat thrust_async.cu
  2.  
  3. #include <iostream>
  4. #include <chrono>
  5. #include <thrust/random.h>
  6. #include <thrust/device_vector.h>
  7. #include <thrust/transform.h>
  8. #include <thrust/iterator/counting_iterator.h>
  9. #include <thrust/iterator/constant_iterator.h>
  10.  
  11. class Timer
  12. {
  13. public:
  14. Timer() : beg_(clock_::now()) {}
  15. void reset() { beg_ = clock_::now(); }
  16. double elapsed() const {
  17. return std::chrono::duration_cast<second_>
  18. (clock_::now() - beg_).count(); }
  19.  
  20. private:
  21. typedef std::chrono::high_resolution_clock clock_;
  22. typedef std::chrono::duration<double, std::ratio<1> > second_;
  23. std::chrono::time_point<clock_> beg_;
  24. };
  25.  
  26. struct prg
  27. {
  28. float a, b;
  29.  
  30. __host__ __device__
  31. prg(float _a=0.f, float _b=1.f) : a(_a), b(_b) {};
  32.  
  33. __host__ __device__
  34. float operator()(const unsigned int n) const
  35. {
  36. thrust::default_random_engine rng;
  37. thrust::uniform_real_distribution<float> dist(a, b);
  38. rng.discard(n);
  39.  
  40. return dist(rng);
  41. }
  42. };
  43.  
  44.  
  45. int main(void)
  46. {
  47. const int N = 1 << 20;
  48.  
  49. {
  50. thrust::device_vector<float> numbers(N);
  51. thrust::device_vector<float> results(N);
  52.  
  53. thrust::counting_iterator<unsigned int> index_sequence_begin(0);
  54. thrust::constant_iterator<unsigned int> keys_in(0);
  55. thrust::device_vector<unsigned int> keys_out(N);
  56.  
  57. Timer timer;
  58. timer.reset();
  59.  
  60. thrust::transform(index_sequence_begin,
  61. index_sequence_begin + N,
  62. numbers.begin(),
  63. prg(1.f,2.f));
  64.  
  65. double t1 = timer.elapsed();
  66.  
  67. auto output = thrust::reduce_by_key(keys_in, keys_in+N, numbers.begin(), keys_out.begin(), results.begin());
  68.  
  69. double t2 = timer.elapsed();
  70. cudaDeviceSynchronize();
  71. double t3 = timer.elapsed();
  72.  
  73. std::cout << "thrust::reduce_by_key took " << 1e3*(t2-t1) << "ms" << std::endl;
  74. std::cout << "cudaDeviceSynchronize took " << 1e3*(t3-t2) << "ms" << std::endl;
  75. }
  76.  
  77. cudaDeviceReset();
  78.  
  79. return 0;
  80. }
  81.  
  82. $ nvcc -std=c++11 -arch=sm_52 -o thrust_async thrust_async.cu
  83.  
  84. $ ./thrust_async
  85. thrust::reduce_by_key took 1.64952ms
  86. cudaDeviceSynchronize took 0.004666ms
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement