Advertisement
Guest User

gpu-cpu-example.cpp

a guest
Jun 10th, 2023
155
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 7.85 KB | None | 0 0
  1. #include <iostream>
  2. #include <iomanip>
  3. #include <vector>
  4. #include <chrono>
  5. #include <thread>
  6. #include <deque>
  7. #include <atomic>
  8. #include <mutex>
  9. #include <boost/compute/core.hpp>
  10. #include <boost/compute/types/struct.hpp>
  11. #include <boost/compute/algorithm/transform.hpp>
  12. #include <boost/compute/container/vector.hpp>
  13.  
  14.  
  15. namespace compute = boost::compute;
  16.  
  17. // printing of the device's info:
  18. std::ostream &operator<<(std::ostream &os, const compute::device & device)
  19. {
  20.     return os << device.name() << " (platform: " << device.platform().name() << ")";
  21. }
  22.  
  23. // struct for maze storing:
  24. struct Field
  25. {
  26.     unsigned nVis[21 * 31];
  27. };
  28.  
  29. // set up the field:
  30. #define pos2int(r, c) ((r) * 31 + (c))
  31. #define WALL (UINT_MAX)
  32. #define EMPTY (0)
  33. Field setField(const std::vector<std::vector<bool>> & f)
  34. {
  35.     Field a;
  36.     for (int r = 0; r < 21; r++)
  37.         for (int c = 0; c < 31; c++)
  38.             a.nVis[pos2int(r,c)] = (f[r][c] ? WALL : EMPTY);
  39.     return a;
  40. }
  41.  
  42. // accumulate the answer for calculated maze (sum of number of visits for each cell)
  43. long long accumulateTheAnswer(const Field &f)
  44. {
  45.     long long res = 0;
  46.     for (int i = 0; i < 21 * 31; i++)
  47.         if (f.nVis[i] != WALL)
  48.             res += f.nVis[i];
  49.     return res;
  50. }
  51.  
  52. // implement bug's moving on CPU:
  53. __attribute__((noinline))
  54. void bugMovingCPU(Field &a)
  55. {
  56.     int pos = 1 * 31 + 1, dir = +31;
  57.     while (pos != 618)
  58.     {
  59.         ++a.nVis[pos];
  60.         unsigned val = a.nVis[pos + dir];
  61.         { unsigned next = a.nVis[pos + 31]; if (next < val) dir = +31, val = next; }
  62.         { unsigned next = a.nVis[pos +  1]; if (next < val) dir =  +1, val = next; }
  63.         { unsigned next = a.nVis[pos - 31]; if (next < val) dir = -31, val = next; }
  64.         { unsigned next = a.nVis[pos -  1]; if (next < val) dir =  -1, val = next; }
  65.         pos += dir;
  66.     }
  67. }
  68.  
  69. // register our structure on GPU:
  70. BOOST_COMPUTE_ADAPT_STRUCT(Field, Field, (nVis))
  71.  
  72. // function to emulate bug's moving on GPU:
  73. BOOST_COMPUTE_FUNCTION(Field, bugMovingGPU, (Field a),
  74. {
  75.     int pos = 1 * 31 + 1, dir = +31;
  76.     while (pos != 618)
  77.     {
  78.         ++a.nVis[pos];
  79.         unsigned val = a.nVis[pos + dir];
  80.         { unsigned next = a.nVis[pos + 31]; if (next < val) dir = +31, val = next; }
  81.         { unsigned next = a.nVis[pos +  1]; if (next < val) dir =  +1, val = next; }
  82.         { unsigned next = a.nVis[pos - 31]; if (next < val) dir = -31, val = next; }
  83.         { unsigned next = a.nVis[pos -  1]; if (next < val) dir =  -1, val = next; }
  84.         pos += dir;
  85.     }
  86.     return a;
  87. });
  88.  
  89. // current time in nanoseconds:
  90. long long currTimeInNanos()
  91. {
  92.     return std::chrono::high_resolution_clock::now().time_since_epoch().count();
  93. }
  94.  
  95. // function for printing numbers like 1234567890 as 1.234.567.890
  96. template<typename T> std::string printWithDots(T x)
  97. {
  98.     std::string s = std::to_string(x);
  99.     std::string t;
  100.     while (s.size())
  101.     {
  102.         for (int cnt = 3; cnt-- && s.size(); )
  103.         {
  104.             t += s.back();
  105.             s.pop_back();
  106.         }
  107.         if (s.size())
  108.             t += '.';
  109.     }
  110.     std::reverse(t.begin(), t.end());
  111.     return t;
  112. }
  113.  
  114. int main(int argc, const char *argv[])
  115. {
  116.     if (argc != 3)
  117.     {
  118.         std::cout << "Usage: ./main <num copies> <input file>" << std::endl;
  119.         return 0;
  120.     }
  121.     const int nFields = std::atoi(argv[1]);
  122.     std::cout << "   nFields: " << nFields << std::endl;
  123.     const char *inputFileName = argv[2];
  124.     std::cout << "input file: '" << inputFileName << "'" << std::endl;
  125.     // reading of the maze from the input file:
  126.     std::ifstream fin(inputFileName);
  127.     if (!fin)
  128.     {
  129.         std::cout << "Can't open the file '" << inputFileName << "'" << std::endl;
  130.         return 0;
  131.     }
  132.     std::vector<std::vector<bool>> f(21, std::vector<bool>(31, 1));
  133.     for (int i = 0; i < 21; i++)
  134.         for (int j = 0; j < 31; j++)
  135.         {
  136.             char ch; fin >> ch;
  137.             f[i][j] = (ch == '#');
  138.         }
  139.    
  140.     // cycle over all of the devices:
  141.     std::cout << "\nList of available devices:\n";
  142.     for (int i = 0; auto device : compute::system::devices())
  143.         std::cout << std::setw(4) << i++ << ":\t" << device << std::endl;
  144.    
  145.     // start CPU jobs by creating vector of threads, each of them will do some work independently
  146.     const int nCPUThreads = std::thread::hardware_concurrency();
  147.     std::atomic<bool> gpuFinished{false};
  148.     std::vector<std::thread> threads;
  149.     std::vector<std::deque<Field>> cpuResults(nCPUThreads);
  150.     for (int id = 0; id < nCPUThreads; id++)
  151.         threads.emplace_back([threadId = id, &gpuFinished, &f, &cpuResults]()
  152.         {
  153.             while (!gpuFinished)
  154.             {
  155.                 // requesting new field:
  156.                 Field currField = setField(f);
  157.                 // calculating new field:
  158.                 bugMovingCPU(currField);
  159.                 // adding this field in a vector of results:
  160.                 cpuResults[threadId].push_back(currField);
  161.             }
  162.         });
  163.        
  164.     std::cout << "\nNumber of CPU threads: " << nCPUThreads << std::endl;
  165.    
  166.     // get the default device
  167.     compute::device device = compute::system::default_device();
  168.     std::cout << "\nDefault device will be used:\n\t" << device << std::endl;
  169.     compute::context context(device);
  170.     compute::command_queue queue(context, device);    
  171.    
  172.     // start the measuring of runtime:
  173.     long long start = currTimeInNanos();
  174.    
  175.     // create vector of `nFields` copies of input field:
  176.     std::vector<Field> host_vector(nFields, setField(f));
  177.    
  178.     // create a vector on the device
  179.     compute::vector<Field> device_vector(host_vector.size(), context);
  180.    
  181.     // transfer data from the host to the device
  182.     compute::copy(host_vector.begin(), host_vector.end(), device_vector.begin(), queue);
  183.    
  184.     // calculate the answer for each field on GPU:
  185.     compute::transform(
  186.         device_vector.begin(), device_vector.end(), // input range
  187.         device_vector.begin(), // begin of output range
  188.         bugMovingGPU, queue // the lambda function which will be applied to each maze, and device's queue
  189.     );
  190.    
  191.     // copy calculated mazes back to the host
  192.     compute::copy(device_vector.begin(), device_vector.end(), host_vector.begin(), queue);
  193.    
  194.     // GPU has been finished here:
  195.     gpuFinished = true;
  196.    
  197.     // waiting while threads are not finished:
  198.     for (auto &t : threads)
  199.         if (t.joinable())
  200.             t.join();
  201.    
  202.     // finish the measuring of runtime:
  203.     long long finish = currTimeInNanos();
  204.     double runtime = (finish - start) * 1e-9;
  205.    
  206.     // calculating number of completed ops on GPU:
  207.     long long totalGPU = 0;
  208.     for (const auto &it : host_vector)
  209.         totalGPU += accumulateTheAnswer(it);
  210.    
  211.     // calculating number of completed ops on CPU:
  212.     long long totalCPU = 0;
  213.     for (const auto &results : cpuResults)
  214.         for (const auto &it : results)
  215.             totalCPU += accumulateTheAnswer(it);
  216.    
  217.     // the speed of GPU and CPU:
  218.     long long speedGPU = totalGPU / runtime;
  219.     long long speedCPU = totalCPU / runtime;
  220.    
  221.     // print the statistics:
  222.     std::cout << "\nCompleted during " << runtime << " seconds:\n";
  223.     std::cout << "  GPU ops: " << std::setw(18) << printWithDots(totalGPU) << std::endl;
  224.     std::cout << "  CPU ops: " << std::setw(18) << printWithDots(totalCPU) << std::endl;
  225.     std::cout << "  Sum ops: " << std::setw(18) << printWithDots(totalCPU+totalGPU) << std::endl;
  226.     std::cout << "\nProductivity per 1 second:\n";
  227.     std::cout << "GPU Speed: " << std::setw(18) << printWithDots(speedGPU) << "/s" << std::endl;
  228.     std::cout << "CPU Speed: " << std::setw(18) << printWithDots(speedCPU) << "/s" << std::endl;
  229.     std::cout << "Sum Speed: " << std::setw(18) << printWithDots(speedGPU+speedCPU) << "/s" << std::endl;
  230.     return 0;
  231. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement