Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- $ cat thrust_async.cu
- #include <iostream>
- #include <chrono>
- #include <thrust/random.h>
- #include <thrust/device_vector.h>
- #include <thrust/transform.h>
- #include <thrust/iterator/counting_iterator.h>
- #include <thrust/iterator/constant_iterator.h>
- class Timer
- {
- public:
- Timer() : beg_(clock_::now()) {}
- void reset() { beg_ = clock_::now(); }
- double elapsed() const {
- return std::chrono::duration_cast<second_>
- (clock_::now() - beg_).count(); }
- private:
- typedef std::chrono::high_resolution_clock clock_;
- typedef std::chrono::duration<double, std::ratio<1> > second_;
- std::chrono::time_point<clock_> beg_;
- };
- struct prg
- {
- float a, b;
- __host__ __device__
- prg(float _a=0.f, float _b=1.f) : a(_a), b(_b) {};
- __host__ __device__
- float operator()(const unsigned int n) const
- {
- thrust::default_random_engine rng;
- thrust::uniform_real_distribution<float> dist(a, b);
- rng.discard(n);
- return dist(rng);
- }
- };
- int main(void)
- {
- const int N = 1 << 20;
- {
- thrust::device_vector<float> numbers(N);
- thrust::device_vector<float> results(N);
- thrust::counting_iterator<unsigned int> index_sequence_begin(0);
- thrust::constant_iterator<unsigned int> keys_in(0);
- thrust::device_vector<unsigned int> keys_out(N);
- Timer timer;
- timer.reset();
- thrust::transform(index_sequence_begin,
- index_sequence_begin + N,
- numbers.begin(),
- prg(1.f,2.f));
- double t1 = timer.elapsed();
- auto output = thrust::reduce_by_key(keys_in, keys_in+N, numbers.begin(), keys_out.begin(), results.begin());
- double t2 = timer.elapsed();
- cudaDeviceSynchronize();
- double t3 = timer.elapsed();
- std::cout << "thrust::reduce_by_key took " << 1e3*(t2-t1) << "ms" << std::endl;
- std::cout << "cudaDeviceSynchronize took " << 1e3*(t3-t2) << "ms" << std::endl;
- }
- cudaDeviceReset();
- return 0;
- }
- $ nvcc -std=c++11 -arch=sm_52 -o thrust_async thrust_async.cu
- $ ./thrust_async
- thrust::reduce_by_key took 1.64952ms
- cudaDeviceSynchronize took 0.004666ms
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement