Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <cuda_runtime.h>
- #include <string>
- #include <iostream>
- #include <unistd.h>
- #include <cstring>
- #include <chrono>
- void printHelp() {
- std::cerr << "./alloc_cuda type init total_amount_mb piece_mb" << std::endl;
- std::cerr << " type - host, device, unified, pinned\n";
- std::cerr << " init - 1/0, if should init with 0\n";
- std::cerr << " total_amount_mb - number of megabytes to alloc\n";
- std::cerr << " piece_mb - size of one alloc chunk\n";
- }
- #define CUDA_CHECK(x) \
- if ((x) != cudaSuccess) { \
- std::cerr << "Cuda call failed" << std::endl; \
- }
- int main(int argc, char **argv) {
- if (argc != 5) {
- printHelp();
- return 0;
- }
- std::string type = argv[1];
- bool init = std::atoi(argv[2]) == 1;
- size_t size = (size_t)std::atoi(argv[3]) * 1024 * 1024;
- size_t piece_size = (size_t)std::atoi(argv[4]) * 1024 * 1024;
- void *ptr = nullptr;
- std::cerr << "Allocating total " << size << " bytes with " << piece_size << " bytes each chunk" << std::endl;
- std::chrono::steady_clock::duration total_alloc_duration{0};
- std::chrono::steady_clock::duration total_init_duration{0};
- size_t done = 0;
- while (done < size) {
- if (done + piece_size > size) {
- piece_size = size - done;
- }
- auto start = std::chrono::steady_clock::now();
- if (type == "host") {
- ptr = malloc(piece_size);
- } else if (type == "device") {
- CUDA_CHECK(cudaMalloc(&ptr, piece_size));
- } else if (type == "unified") {
- CUDA_CHECK(cudaMallocManaged(&ptr, piece_size));
- } else if (type == "pinned") {
- CUDA_CHECK(cudaHostAlloc(&ptr, piece_size, cudaHostAllocMapped));
- } else {
- printHelp();
- return 0;
- }
- total_alloc_duration += (std::chrono::steady_clock::now() - start);
- done += piece_size;
- start = std::chrono::steady_clock::now();
- if (init) {
- if (type == "host" || type == "pinned") {
- memset(ptr, -999999, piece_size);
- }
- if (type == "device" || type == "unified") {
- CUDA_CHECK(cudaMemset(ptr, -999999, piece_size));
- }
- if (type == "pinned") {
- void *devPtr;
- CUDA_CHECK(cudaHostGetDevicePointer(&devPtr, ptr, 0));
- CUDA_CHECK(cudaMemset(devPtr, -999999, piece_size));
- }
- }
- total_init_duration += (std::chrono::steady_clock::now() - start);
- }
- std::cerr << "Total allocations time: " << std::chrono::duration_cast<std::chrono::milliseconds>(total_alloc_duration).count() << " ms" << std::endl;
- std::cerr << "Total init time: " << std::chrono::duration_cast<std::chrono::milliseconds>(total_init_duration).count() << " ms" << std::endl;
- std::cerr << "Done." << std::endl;
- sleep(999999);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment