Advertisement
Guest User

Untitled

a guest
Aug 26th, 2019
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.72 KB | None | 0 0
  1. // Compiling and running this program:
  2. // nvcc -std=c++11 device-prop-test.cu && ./a.out
  3. #include <chrono>
  4. #include <iostream>
  5. using namespace std;
  6.  
  7. #define CUDA_CHECK(call) \
  8. do { \
  9. cudaError_t status = call; \
  10. if(status != cudaSuccess) { \
  11. printf("FAIL: call='%s'. Reason:%s\n", #call, \
  12. cudaGetErrorString(status)); \
  13. return -1; \
  14. } \
  15. } while (0)
  16.  
  17. int main(int argc, char** argv) {
  18. int devId;
  19. CUDA_CHECK(cudaGetDevice(&devId));
  20.  
  21. auto start = chrono::high_resolution_clock::now();
  22. cudaDeviceProp prop;
  23. for(int i = 0; i < 25; ++i) {
  24. CUDA_CHECK(cudaGetDeviceProperties(&prop, devId));
  25. }
  26. auto end = chrono::high_resolution_clock::now();
  27. cout
  28. << "cudaGetDeviceProperties -> "
  29. << chrono::duration_cast<chrono::microseconds>(end - start).count() / 25.0
  30. << "us" << endl;
  31.  
  32. int smemSize, numProcs;
  33. start = chrono::high_resolution_clock::now();
  34. for(int i = 0; i < 25; ++i) {
  35. CUDA_CHECK(cudaDeviceGetAttribute(&smemSize,
  36. cudaDevAttrMaxSharedMemoryPerBlock,
  37. devId));
  38. CUDA_CHECK(cudaDeviceGetAttribute(&numProcs,
  39. cudaDevAttrMultiProcessorCount,
  40. devId));
  41. }
  42. end = chrono::high_resolution_clock::now();
  43. cout
  44. << "cudaDeviceGetAttribute -> "
  45. << chrono::duration_cast<chrono::microseconds>(end - start).count() / 25.0
  46. << "us" << endl;
  47. return 0;
  48. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement