Guest User

Untitled

a guest
Nov 15th, 2021
193
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.95 KB | None | 0 0
  1. #include <cuda_runtime.h>
  2. #include <string>
  3. #include <iostream>
  4. #include <unistd.h>
  5. #include <cstring>
  6. #include <chrono>
  7.  
  8. void printHelp() {
  9.     std::cerr << "./alloc_cuda type init total_amount_mb piece_mb" << std::endl;
  10.     std::cerr << "  type - host, device, unified, pinned\n";
  11.     std::cerr << "  init - 1/0, if should init with 0\n";
  12.     std::cerr << "  total_amount_mb - number of megabytes to alloc\n";
  13.     std::cerr << "  piece_mb - size of one alloc chunk\n";
  14. }
  15.  
  16. #define CUDA_CHECK(x) \
  17.     if ((x) != cudaSuccess) { \
  18.         std::cerr << "Cuda call failed" << std::endl; \
  19.     }
  20.  
  21. int main(int argc, char **argv) {
  22.     if (argc != 5) {
  23.         printHelp();
  24.         return 0;
  25.     }
  26.  
  27.     std::string type = argv[1];
  28.     bool init = std::atoi(argv[2]) == 1;
  29.     size_t size = (size_t)std::atoi(argv[3]) * 1024 * 1024;
  30.     size_t piece_size = (size_t)std::atoi(argv[4]) * 1024 * 1024;
  31.     void *ptr = nullptr;
  32.  
  33.     std::cerr << "Allocating total " << size << " bytes with " << piece_size << " bytes each chunk" << std::endl;
  34.  
  35.     std::chrono::steady_clock::duration total_alloc_duration{0};
  36.     std::chrono::steady_clock::duration total_init_duration{0};
  37.  
  38.     size_t done = 0;
  39.     while (done < size) {
  40.         if (done + piece_size > size) {
  41.             piece_size = size - done;
  42.         }
  43.  
  44.         auto start = std::chrono::steady_clock::now();
  45.         if (type == "host") {
  46.             ptr = malloc(piece_size);
  47.         } else if (type == "device") {
  48.             CUDA_CHECK(cudaMalloc(&ptr, piece_size));
  49.         } else if (type == "unified") {
  50.             CUDA_CHECK(cudaMallocManaged(&ptr, piece_size));
  51.         } else if (type == "pinned") {
  52.             CUDA_CHECK(cudaHostAlloc(&ptr, piece_size, cudaHostAllocMapped));
  53.         } else {
  54.             printHelp();
  55.             return 0;
  56.         }
  57.  
  58.         total_alloc_duration += (std::chrono::steady_clock::now() - start);
  59.         done += piece_size;
  60.  
  61.         start = std::chrono::steady_clock::now();
  62.         if (init) {
  63.             if (type == "host" || type == "pinned") {
  64.                 memset(ptr, -999999, piece_size);
  65.             }
  66.             if (type == "device" || type == "unified") {
  67.                 CUDA_CHECK(cudaMemset(ptr, -999999, piece_size));
  68.             }
  69.             if (type == "pinned") {
  70.                 void *devPtr;
  71.                 CUDA_CHECK(cudaHostGetDevicePointer(&devPtr, ptr, 0));
  72.                 CUDA_CHECK(cudaMemset(devPtr, -999999, piece_size));
  73.             }
  74.         }
  75.         total_init_duration += (std::chrono::steady_clock::now() - start);
  76.     }
  77.  
  78.     std::cerr << "Total allocations time: " << std::chrono::duration_cast<std::chrono::milliseconds>(total_alloc_duration).count() << " ms" << std::endl;
  79.     std::cerr << "Total init time: " << std::chrono::duration_cast<std::chrono::milliseconds>(total_init_duration).count() << " ms" << std::endl;
  80.  
  81.     std::cerr << "Done." << std::endl;
  82.  
  83.     sleep(999999);
  84.  
  85.     return 0;
  86. }
Advertisement
Add Comment
Please, Sign In to add comment