Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <sys/time.h>
- #include <cuda.h>
- #include <math.h>
- double wtime()
- {
- struct timeval t;
- gettimeofday (&t, NULL);
- return (double)t.tv_sec + (double)t.tv_usec * 1E-6;
- }
- __global__ void add(float *a_device, float *b_device, float *c_device)
- {
- int i = threadIdx.x + blockDim.x * blockIdx.x;
- a_device[i] = b_device[i] + c_device[i];
- a_device[i] *= a_device[i];
- }
- int main()
- {
- int N = 32 * 386,
- threads = 32,
- num_of_blocks = 386;
- float *a = (float *)calloc(N, sizeof(*a));
- float *b = (float *)calloc(N, sizeof(*b));
- float *c = (float *)calloc(N, sizeof(*c));
- float *a_device;
- float *b_device;
- float *c_device;
- float *a_async;
- float *b_async;
- float *c_async;
- cudaMalloc((void **)&a_device, N * sizeof(*a_device));
- cudaMalloc((void **)&b_device, N * sizeof(*b_device));
- cudaMalloc((void **)&c_device, N * sizeof(*c_device));
- cudaHostAlloc((void**)&a_async, N * sizeof(float), cudaHostAllocDefault);
- cudaHostAlloc((void**)&b_async, N * sizeof(float), cudaHostAllocDefault);
- cudaHostAlloc((void**)&c_async, N * sizeof(float), cudaHostAllocDefault);
- for (int i = 0; i < N; i++)
- {
- b[i] = i;
- c[i] = i;
- }
- double cpyDef = -wtime();
- cudaMemcpy(b_device, b, N * sizeof(*b), cudaMemcpyHostToDevice);
- cudaMemcpy(c_device, c, N * sizeof(*c), cudaMemcpyHostToDevice);
- cpyDef += wtime();
- double cpyAsync = -wtime();
- cudaMemcpyAsync(b_device, b_async, N * sizeof(*b), cudaMemcpyHostToDevice);
- cudaMemcpyAsync(c_device, c_async, N * sizeof(*c), cudaMemcpyHostToDevice);
- cpyAsync += wtime();
- printf("host->device: \n" "def: %lf\n" "paging: %lf\n\n", cpyDef, cpyAsync);
- add <<< num_of_blocks, threads >>> (a_device, b_device, c_device);
- cudaDeviceSynchronize();
- double backDef = -wtime();
- cudaMemcpy(a, a_device, N * sizeof(float), cudaMemcpyDeviceToHost);
- backDef += wtime();
- double backAsync = -wtime();
- cudaMemcpyAsync(a_async, a_device, N * sizeof(float), cudaMemcpyDeviceToHost);
- backAsync += wtime();
- printf("device->host:\n" "def: %lf\n" "paging: %lf\n\n", backDef, backAsync);
- printf("summary:\n" "def: %lf\n" "paging: %lf\n", cpyDef + backDef, cpyAsync + backAsync);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement