Advertisement
Guest User

Untitled

a guest
Feb 17th, 2011
246
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 6.66 KB | None | 0 0
  1. #include <boost/thread.hpp>
  2. #include <boost/thread/mutex.hpp>
  3. #include <boost/thread/condition.hpp>
  4. #include <boost/thread/locks.hpp>
  5. #include <boost/bind.hpp>
  6. #include <boost/timer.hpp>
  7. #include <iostream>
  8. #include <string>
  9. #include <cassert>
  10.  
  11.  
  12. boost::mutex print_mutex;
  13. void print(std::string s) {
  14.     boost::mutex::scoped_lock lock(print_mutex);
  15.     std::cout << s << std::endl << std::flush;
  16. }
  17.  
  18. template <typename T>
  19. class device_matrix {
  20.     public:
  21.         enum method_invocation {
  22.             NO_METHOD = 0,
  23.             SUM_METHOD = 1,
  24.             SQUARE_METHOD = 2,
  25.             NOOP_METHOD = 3
  26.         };
  27.  
  28.     private:
  29.         bool m_stop;
  30.         method_invocation m_method; // indicate which method to run
  31.         boost::thread m_thread;
  32.         boost::condition m_cond_workorder_ready; // condition is that work has been ordered, via m_method
  33.         boost::condition m_cond_workorder_finished; // condition that the last work order has been completed, and results can be utilized further
  34.         boost::mutex m_mutex;
  35.         int m_elements;
  36.         T* m_data;
  37.         T m_result;
  38.  
  39.     public:
  40.         typedef boost::mutex::scoped_lock scoped_lock;
  41.  
  42.         device_matrix(T* data, unsigned int elements) :
  43.             m_method(NO_METHOD), m_data(data),
  44.             m_elements(elements), m_result(-1), m_stop(false) {
  45.             m_thread = boost::thread(&device_matrix::thread_loop, this);
  46.         }
  47.  
  48.         ~device_matrix() {
  49.             {
  50.                 scoped_lock lock(m_mutex);
  51.                 // and a dummy method to get the thread to do nothing
  52.                 m_method = NOOP_METHOD;
  53.                 // we fake a work order to wake the thread up
  54.                 m_cond_workorder_ready.notify_one();
  55.                 m_stop = true;
  56.             } // drop the mutex, and then wait for the device thread to die off
  57.  
  58.             m_thread.join(); // main host waits for device thread to stop here.
  59.         }
  60.  
  61.         void sum() {
  62.             scoped_lock lock(m_mutex);
  63.             while (m_method != NO_METHOD) {
  64.                 m_cond_workorder_finished.wait(lock);
  65.             }
  66.             m_method = SUM_METHOD;
  67.             m_cond_workorder_ready.notify_one();
  68.         }
  69.  
  70.         void square() {
  71.             scoped_lock lock(m_mutex);
  72.             while (m_method != NO_METHOD) {
  73.                 m_cond_workorder_finished.wait(lock);
  74.             }
  75.             m_method = SQUARE_METHOD;
  76.             m_cond_workorder_ready.notify_one();
  77.         }
  78.  
  79.         T result() {
  80.             scoped_lock lock(m_mutex);
  81.             while (m_method != NO_METHOD) {
  82.                 m_cond_workorder_finished.wait(lock);
  83.             }
  84.             return m_result;
  85.         }
  86.  
  87.     private:
  88.  
  89.         void do_sum() {
  90.             float sum = 0.0;
  91.             float c = 0.0;
  92.             float y, t;
  93.             for (int i = 0; i < m_elements; i++) {
  94.                 y = m_data[i] - c;
  95.                 t = sum + y;
  96.                 c = (t - sum) - y;
  97.                 sum = t;
  98.             }
  99.             m_result = sum;
  100.         }
  101.  
  102.         void do_square() {
  103.             m_result = 2.0;
  104.         }
  105.  
  106.         void initialize_cuda() {
  107.             scoped_lock lock(m_mutex);
  108.             // this is where we can initialize cuda
  109.         }
  110.  
  111.         void thread_loop() {
  112.             initialize_cuda();
  113.  
  114.             while (1) {
  115.                 scoped_lock lock(m_mutex);
  116.                 while (m_method == NO_METHOD) m_cond_workorder_ready.wait(lock);
  117.                 switch (m_method) {
  118.                     case SUM_METHOD:
  119.                         do_sum();
  120.                         break;
  121.                     case SQUARE_METHOD:
  122.                         do_square();
  123.                         break;
  124.                     default:
  125.                     case NOOP_METHOD:
  126.                     case NO_METHOD:
  127.                         break;
  128.                 }
  129.                 m_method = NO_METHOD;
  130.                 m_cond_workorder_finished.notify_all(); // alert waiting threads, that we are done
  131.  
  132.                 if (m_stop) {
  133.                     return; // ends thread
  134.                 }
  135.             }
  136.         }
  137. };
  138.  
  139. float random_float(float add = 0.0) {
  140.     return ((float)rand()/(float)RAND_MAX) + add;
  141. }
  142.  
  143. int main() {
  144.     // these are powers of two, for easy splits for now
  145.     unsigned int elements = 2 << 27;
  146.     unsigned int number_of_splits = 8;
  147.     float *data = new float[elements];
  148.  
  149.     std::vector<device_matrix<float>*> matrices;
  150.  
  151.     boost::timer timer;
  152.     for (int i = 0; i < elements; i++) {
  153.         data[i] = random_float();
  154.     }
  155.     double time_to_init = timer.elapsed();
  156.  
  157.     // populate and calculate reference result
  158.     // uses kahan summation algorithm.
  159.     float sum = 0.0;
  160.     float c = 0.0;
  161.     float y, t;
  162.     timer.restart();
  163.     for (int i = 0; i < elements; i++) {
  164.         y = data[i] - c;
  165.         t = sum + y;
  166.         c = (t - sum) - y;
  167.         sum = t;
  168.     }
  169.  
  170.     double time_to_sum_single = timer.elapsed();
  171.  
  172.     for (int i = 0; i < number_of_splits; i ++) {
  173.         unsigned int data_size = elements/number_of_splits;
  174.         matrices.push_back(new device_matrix<float>(data+(i*data_size), data_size));
  175.     }
  176.  
  177.     std::cout << "starting computation" << std::endl;
  178.  
  179.     timer.restart();
  180.     // start up the matrices
  181.     for (int i = 0; i < number_of_splits; i ++) {
  182.         matrices[i]->sum();
  183.     }
  184.  
  185.     // add up results
  186.     float result = 0.0;
  187.     for (int i = 0; i < number_of_splits; i ++) {
  188.         result += matrices[i]->result();
  189.     }
  190.  
  191.     double time_to_sum_device = timer.elapsed();
  192.  
  193.     std::cout << "device_matrix count       " << matrices.size() << std::endl;
  194.     std::cout << "elements                  " << elements << std::endl;
  195.     std::cout << "UINT_MAX                  " << UINT_MAX << std::endl;
  196.     std::cout << "data size total           " << elements*sizeof(float)/1024/1024 << " mb" << std::endl;
  197.     std::cout << "size per device_matrix    " << elements*sizeof(float)/1024/1024/number_of_splits << " mb" << std::endl;
  198.  
  199.     std::cout.precision(5);
  200.     std::cout.setf(std::ios_base::fixed);
  201.     std::cout << "reference                 " << sum << std::endl;
  202.     std::cout << "result                    " << result << std::endl;
  203.     std::cout.precision(3);
  204.     std::cout << "time taken (init)         " << time_to_init << " secs" << std::endl;
  205.     std::cout << "time taken (single)       " << time_to_sum_single << " secs" << std::endl;
  206.     std::cout << "time taken (threaded)     " << time_to_sum_device << " secs" << std::endl;
  207.  
  208.     // dealloc shit
  209.     for (int i = 0; i < matrices.size(); i++) {
  210.         delete matrices[i];
  211.     }
  212.     delete[] data;
  213.  
  214.     return 0;
  215. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement