Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- #include <stdio.h>
- cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size);
- __global__ void addKernel(int *c, const int *a, const int *b)
- {
- int i = threadIdx.x;
- c[i] = a[i] + b[i];
- }
- int main()
- {
- cudaEvent_t start, stop;
- const int arraySize = 10000;
- // const int a[arraySize] = { 1, 2, 3, 4, 5 };
- //const int b[arraySize] = { 10, 20, 30, 40, 50 };
- int a[arraySize] = {};
- int b[arraySize] = {};
- int c[arraySize] = { 0 };
- cudaEventCreate(&start);
- cudaEventCreate(&stop);
- int i;
- for (i = 0; i < arraySize; i++)
- {
- a[i] = i;
- b[i] = 300+i;
- }
- cudaEventRecord(start, 0);
- cudaError_t cudaStatus = addWithCuda(c, a, b, arraySize);
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "addWithCuda failed!");
- return 1;
- }
- cudaEventRecord(stop, 0);
- cudaEventSynchronize(stop);
- float elapsedTime; // Initialize elapsedTime;
- cudaEventElapsedTime(&elapsedTime, start, stop);
- printf("Execution Time with GPU: %f \n", elapsedTime);
- cudaEventRecord(start, 0);
- for (i = 0; i < arraySize; i++)
- {
- c[i] = a[i] + b[i];
- }
- cudaEventRecord(stop, 0);
- cudaEventSynchronize(stop);
- float elapTime; // Initialize elapsedTime;
- cudaEventElapsedTime(&elapTime, start, stop);
- printf("Execution Time with CPU: %f", elapTime);
- // cudaDeviceReset must be called before exiting in order for profiling and
- // tracing tools such as Nsight and Visual Profiler to show complete traces.
- cudaStatus = cudaDeviceReset();
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaDeviceReset failed!");
- return 1;
- }
- return 0;
- }
- // Helper function for using CUDA to add vectors in parallel.
- cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size)
- {
- int *dev_a = 0;
- int *dev_b = 0;
- int *dev_c = 0;
- cudaError_t cudaStatus;
- // Choose which GPU to run on, change this on a multi-GPU system.
- cudaStatus = cudaSetDevice(0);
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");
- goto Error;
- }
- // Allocate GPU buffers for three vectors (two input, one output) .
- cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int));
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMalloc failed!");
- goto Error;
- }
- cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int));
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMalloc failed!");
- goto Error;
- }
- cudaStatus = cudaMalloc((void**)&dev_b, size * sizeof(int));
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMalloc failed!");
- goto Error;
- }
- // Copy input vectors from host memory to GPU buffers.
- cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMemcpy failed!");
- goto Error;
- }
- cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice);
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMemcpy failed!");
- goto Error;
- }
- // Launch a kernel on the GPU with one thread for each element.
- addKernel<<<1, size>>>(dev_c, dev_a, dev_b);
- // Check for any errors launching the kernel
- cudaStatus = cudaGetLastError();
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
- goto Error;
- }
- // cudaDeviceSynchronize waits for the kernel to finish, and returns
- // any errors encountered during the launch.
- cudaStatus = cudaDeviceSynchronize();
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
- goto Error;
- }
- // Copy output vector from GPU buffer to host memory.
- cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMemcpy failed!");
- goto Error;
- }
- Error:
- cudaFree(dev_c);
- cudaFree(dev_a);
- cudaFree(dev_b);
- return cudaStatus;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement