Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- #include "stdio.h"
- #include "windows.h"
- #include <locale.h>
- #define SIZE 1024
- __global__ void VectorAdd(int *a, int *b, int *c, int n) {
- int i = threadIdx.x;
- //for(i = 0; i < n; ++i)
- if (i < n)
- c[i] = a[i] + b[i];
- }
- int main() {
- setlocale(LC_ALL, "Russian");
- int Time1, Time2, Delay1;
- int *a, *b, *c;
- int *d_a, *d_b, *d_c;
- a = (int *)malloc(SIZE * sizeof(int));
- b = (int *)malloc(SIZE * sizeof(int));
- c = (int *)malloc(SIZE * sizeof(int));
- cudaMalloc(&d_a, SIZE * sizeof(int));
- cudaMalloc(&d_b, SIZE * sizeof(int));
- cudaMalloc(&d_c, SIZE * sizeof(int));
- Time1 = GetTickCount();
- for (int i = 0; i < SIZE; ++i) {
- a[i] = i;
- b[i] = i;
- c[i] = 0;
- }
- cudaMemcpy(d_a, a, SIZE * sizeof(int), cudaMemcpyHostToDevice);
- cudaMemcpy(d_b, b, SIZE * sizeof(int), cudaMemcpyHostToDevice);
- cudaMemcpy(d_c, c, SIZE * sizeof(int), cudaMemcpyHostToDevice);
- VectorAdd <<<1, SIZE >>> (d_a, d_b, d_c, SIZE);
- cudaMemcpy(c, d_c, SIZE * sizeof(int), cudaMemcpyDeviceToHost);
- for (int i = 0; i < 100; ++i)
- printf("c[%d] = %d\n", i, c[i]);
- Time2 = GetTickCount();
- Delay1 = Time2 - Time1;
- free(a);
- free(b);
- free(c);
- cudaFree(d_a);
- cudaFree(d_b);
- cudaFree(d_c);
- printf("\nВремя вычисления CUDA = %d ms\n", Delay1);
- system("pause");
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement