Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <opencv2/opencv.hpp>
- #include <chrono>
- #include <iostream>
- using std::chrono::high_resolution_clock;
- using std::chrono::duration_cast;
- using std::chrono::microseconds;
- // ============================================================================
- void variant_1(cv::Mat3b image, cv::Vec3f divisor)
- {
- image.forEach(
- [&divisor](cv::Vec3b& pixel, const int* po) -> void {
- for (uint8_t i(0); i < 3; ++i) {
- pixel[i] = cv::saturate_cast<uint8_t>(pixel[i] / divisor[i]);
- }
- }
- );
- }
- // ============================================================================
- class Variant2Impl : public cv::ParallelLoopBody
- {
- private:
- cv::Mat3b& image_;
- cv::Vec3f divisor_;
- public:
- Variant2Impl(cv::Mat3b& image, cv::Vec3f divisor)
- : image_(image), divisor_(divisor)
- {
- }
- virtual void operator()(const cv::Range& range) const
- {
- for (int32_t i(range.start); i < range.end; ++i) {
- for (cv::Vec3b& pixel : image_.row(i)) {
- for (uint8_t i(0); i < 3; ++i) {
- pixel[i] = cv::saturate_cast<uint8_t>(pixel[i] / divisor_[i]);
- }
- }
- }
- }
- };
- template<typename Impl>
- void variant_2(cv::Mat3b image, cv::Vec3f divisor)
- {
- cv::parallel_for_(cv::Range(0, image.rows), Impl(image, divisor));
- }
- // ============================================================================
- typedef std::vector<double> times_vector;
- typedef std::vector<times_vector> thread_times_vector;
- #define IDENTICAL_INPUT
- template <typename Fn>
- thread_times_vector test_variant(Fn fn
- , cv::Mat3b source_image
- , cv::Vec3f divisor
- , int32_t max_threads
- , int32_t num_iterations)
- {
- thread_times_vector times;
- for (int32_t n(0); n < max_threads; ++n) {
- cv::setNumThreads(n + 1);
- std::cout << cv::getNumThreads() << " ";
- times.push_back(times_vector(num_iterations));
- cv::Mat3b work_image;
- #ifndef IDENTICAL_INPUT
- source_image.copyTo(work_image);
- #endif
- for (int32_t i(0); i < num_iterations; ++i) {
- #ifdef IDENTICAL_INPUT
- source_image.copyTo(work_image);
- #endif
- high_resolution_clock::time_point t1(high_resolution_clock::now());
- fn(work_image, divisor);
- high_resolution_clock::time_point t2(high_resolution_clock::now());
- times[n][i] = static_cast<double>(duration_cast<microseconds>(t2 - t1).count());
- }
- }
- return times;
- }
- // ============================================================================
- void print_results(std::string const& header, thread_times_vector const& times)
- {
- std::cout << "\n" << header << "\nthreads,mean(us),min(us),max(us)\n";
- for (int32_t i(0); i < times.size(); ++i) {
- double mean = cv::mean(times[i])[0];
- double min(0.0), max(0.0);
- cv::minMaxLoc(times[i], &min, &max);
- std::cout << (i + 1) << "," << mean << "," << min << "," << max << "\n";
- }
- std::cout << "\n";
- }
- // ============================================================================
- int main()
- {
- cv::Mat3b source_image(cv::Mat::zeros(1024, 1024, CV_8UC3));
- cv::randu(source_image, 0, 256);
- cv::Vec3f divisor(1.2f, 0.8f, 0.99f);
- int32_t MAX_THREADS(cv::getNumberOfCPUs());
- int32_t NUM_ITERATIONS(1000);
- thread_times_vector t1 = test_variant(&variant_1
- , source_image, divisor, MAX_THREADS, NUM_ITERATIONS);
- print_results("Variant 1", t1);
- thread_times_vector t2 = test_variant(&variant_2<Variant2Impl>
- , source_image, divisor, MAX_THREADS, NUM_ITERATIONS);
- print_results("Variant 2", t2);
- }
- // ============================================================================
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement