Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- Test blocking modes.
- https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html#group__CUDART__DEVICE_1g69e73c7dda3fc05306ae7c811a690fac
- Linux
- $ ./test_cpu_usage 25000000 128 default
- Wall clock time 0.978 seconds, cpu time 0.977 seconds
- $ ./test_cpu_usage 25000000 128 spin
- Wall clock time 0.995 seconds, cpu time 0.995 seconds
- $ ./test_cpu_usage 25000000 128 yield
- Wall clock time 0.998 seconds, cpu time 0.997 seconds
- $ ./test_cpu_usage 25000000 128 block
- Wall clock time 1.001 seconds, cpu time 0.000 seconds
- Windows
- $ ./test_cpu_usage 25000000 128 default
- Wall clock time 0.974 seconds, cpu time 0.984 seconds
- $ ./test_cpu_usage 25000000 128 spin
- Wall clock time 0.993 seconds, cpu time 1.000 seconds
- $ ./test_cpu_usage 25000000 128 yield
- Wall clock time 0.969 seconds, cpu time 0.297 seconds
- $ ./test_cpu_usage 25000000 128 block
- Wall clock time 0.972 seconds, cpu time 0.000 seconds
- */
- #include <cstdio>
- #include <cstring>
- #include <cstdint>
- #include <time.h>
- #include <cuda.h>
- // A macro for checking the error codes of cuda runtime calls
- #define CUDA_ERROR_CHECK(expr) \
- { \
- cudaError_t err = expr; \
- if (err != cudaSuccess) \
- { \
- printf("CUDA call failed!\n%s\n", cudaGetErrorString(err)); \
- exit(1); \
- } \
- }
- #ifdef _WIN32
- #include <windows.h>
- double getSeconds(void) {
- static LARGE_INTEGER queryPerfFrequency = {0};
- LARGE_INTEGER counter;
- if (queryPerfFrequency.QuadPart == 0) {
- QueryPerformanceFrequency(&queryPerfFrequency);
- }
- QueryPerformanceCounter(&counter);
- return counter.QuadPart / (double) queryPerfFrequency.QuadPart;
- }
- double getCPUSeconds() {
- FILETIME creation_time, exit_time, kernel_time, user_time;
- GetProcessTimes(GetCurrentProcess(), &creation_time, &exit_time, &kernel_time, &user_time);
- uint64_t intervals_100_nanos = user_time.dwLowDateTime + ((uint64_t)user_time.dwHighDateTime << 32);
- return intervals_100_nanos / 10000000.;
- }
- #else
- double getSeconds() {
- struct timespec t;
- clock_gettime(CLOCK_MONOTONIC, &t);
- return t.tv_sec + 1e-9 * t.tv_nsec;
- }
- double getCPUSeconds() {
- return (double) clock() / CLOCKS_PER_SEC;
- }
- #endif
- __global__ void busy_kernel(int iters, int size, double *data) {
- if (threadIdx.x >= size) return;
- data[threadIdx.x] += 3.0;
- for (int i=0; i < iters; i++) {
- data[threadIdx.x] = data[threadIdx.x] * 2. - 3.;
- }
- }
- int main(int argc, char **argv) {
- int iters, size;
- if (argc != 4) {
- printf("\n test_cpu_usage <iters> <GPU thread count> default|spin|yield|block\n\n");
- return 1;
- }
- iters = atoi(argv[1]);
- size = atoi(argv[2]);
- int sync_flag = 0;
- if (!strcmp(argv[3], "spin")) {
- sync_flag = cudaDeviceScheduleSpin;
- } else if (!strcmp(argv[3], "yield")) {
- sync_flag = cudaDeviceScheduleYield;
- } else if (!strcmp(argv[3], "block")) {
- sync_flag = cudaDeviceScheduleBlockingSync;
- } else if (strcmp(argv[3], "default")) {
- printf("Invalid sync mode: %s\n", argv[3]);
- return 1;
- }
- CUDA_ERROR_CHECK(cudaSetDeviceFlags(sync_flag));
- double *data, *data_dev;
- data = new double[size];
- memset(data, 0, sizeof(double) * size);
- CUDA_ERROR_CHECK(cudaMalloc((void**) &data_dev, sizeof(double) * size));
- CUDA_ERROR_CHECK(cudaMemcpy(data_dev, data, sizeof(double) * size, cudaMemcpyHostToDevice));
- double wall_time = getSeconds();
- double cpu_time = getCPUSeconds();
- busy_kernel<<<1, size>>>(iters, size, data_dev);
- CUDA_ERROR_CHECK(cudaDeviceSynchronize());
- wall_time = getSeconds() - wall_time;
- cpu_time = getCPUSeconds() - cpu_time;
- printf("Wall clock time %.3f seconds, cpu time %.3f seconds\n", wall_time, cpu_time);
- cudaFree(data_dev);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement