Advertisement
Guest User

Untitled

a guest
Oct 21st, 2019
109
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.35 KB | None | 0 0
  1.  
  2. #include "cuda_runtime.h"
  3. #include "device_launch_parameters.h"
  4.  
  5. #include <stdio.h>
  6.  
  7. cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size);
  8.  
  9.  
  10. __global__ void addKernel(int *c, const int *a, const int *b)
  11. {
  12. int i = threadIdx.x;
  13. c[i] = a[i] + b[i];
  14. }
  15.  
  16. int main()
  17. {
  18. cudaEvent_t start, stop;
  19. const int arraySize = 10000;
  20. // const int a[arraySize] = { 1, 2, 3, 4, 5 };
  21. //const int b[arraySize] = { 10, 20, 30, 40, 50 };
  22. int a[arraySize] = {};
  23. int b[arraySize] = {};
  24. int c[arraySize] = { 0 };
  25.  
  26.  
  27.  
  28. cudaEventCreate(&start);
  29. cudaEventCreate(&stop);
  30.  
  31.  
  32. int i;
  33.  
  34.  
  35. for (i = 0; i < arraySize; i++)
  36. {
  37. a[i] = i;
  38. b[i] = 300+i;
  39. }
  40.  
  41. cudaEventRecord(start, 0);
  42. cudaError_t cudaStatus = addWithCuda(c, a, b, arraySize);
  43. if (cudaStatus != cudaSuccess) {
  44. fprintf(stderr, "addWithCuda failed!");
  45. return 1;
  46. }
  47.  
  48. cudaEventRecord(stop, 0);
  49. cudaEventSynchronize(stop);
  50.  
  51. float elapsedTime; // Initialize elapsedTime;
  52.  
  53. cudaEventElapsedTime(&elapsedTime, start, stop);
  54. printf("Execution Time with GPU: %f \n", elapsedTime);
  55.  
  56. cudaEventRecord(start, 0);
  57.  
  58. for (i = 0; i < arraySize; i++)
  59. {
  60. c[i] = a[i] + b[i];
  61. }
  62.  
  63. cudaEventRecord(stop, 0);
  64. cudaEventSynchronize(stop);
  65.  
  66. float elapTime; // Initialize elapsedTime;
  67.  
  68. cudaEventElapsedTime(&elapTime, start, stop);
  69. printf("Execution Time with CPU: %f", elapTime);
  70.  
  71.  
  72.  
  73.  
  74. // cudaDeviceReset must be called before exiting in order for profiling and
  75. // tracing tools such as Nsight and Visual Profiler to show complete traces.
  76. cudaStatus = cudaDeviceReset();
  77. if (cudaStatus != cudaSuccess) {
  78. fprintf(stderr, "cudaDeviceReset failed!");
  79. return 1;
  80. }
  81.  
  82. return 0;
  83. }
  84.  
  85. // Helper function for using CUDA to add vectors in parallel.
  86. cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size)
  87. {
  88. int *dev_a = 0;
  89. int *dev_b = 0;
  90. int *dev_c = 0;
  91. cudaError_t cudaStatus;
  92.  
  93. // Choose which GPU to run on, change this on a multi-GPU system.
  94. cudaStatus = cudaSetDevice(0);
  95. if (cudaStatus != cudaSuccess) {
  96. fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");
  97. goto Error;
  98. }
  99.  
  100. // Allocate GPU buffers for three vectors (two input, one output) .
  101. cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int));
  102. if (cudaStatus != cudaSuccess) {
  103. fprintf(stderr, "cudaMalloc failed!");
  104. goto Error;
  105. }
  106.  
  107. cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int));
  108. if (cudaStatus != cudaSuccess) {
  109. fprintf(stderr, "cudaMalloc failed!");
  110. goto Error;
  111. }
  112.  
  113. cudaStatus = cudaMalloc((void**)&dev_b, size * sizeof(int));
  114. if (cudaStatus != cudaSuccess) {
  115. fprintf(stderr, "cudaMalloc failed!");
  116. goto Error;
  117. }
  118.  
  119. // Copy input vectors from host memory to GPU buffers.
  120. cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
  121. if (cudaStatus != cudaSuccess) {
  122. fprintf(stderr, "cudaMemcpy failed!");
  123. goto Error;
  124. }
  125.  
  126. cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice);
  127. if (cudaStatus != cudaSuccess) {
  128. fprintf(stderr, "cudaMemcpy failed!");
  129. goto Error;
  130. }
  131.  
  132. // Launch a kernel on the GPU with one thread for each element.
  133. addKernel<<<1, size>>>(dev_c, dev_a, dev_b);
  134.  
  135. // Check for any errors launching the kernel
  136. cudaStatus = cudaGetLastError();
  137. if (cudaStatus != cudaSuccess) {
  138. fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
  139. goto Error;
  140. }
  141.  
  142. // cudaDeviceSynchronize waits for the kernel to finish, and returns
  143. // any errors encountered during the launch.
  144. cudaStatus = cudaDeviceSynchronize();
  145. if (cudaStatus != cudaSuccess) {
  146. fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
  147. goto Error;
  148. }
  149.  
  150. // Copy output vector from GPU buffer to host memory.
  151. cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
  152. if (cudaStatus != cudaSuccess) {
  153. fprintf(stderr, "cudaMemcpy failed!");
  154. goto Error;
  155. }
  156.  
  157. Error:
  158. cudaFree(dev_c);
  159. cudaFree(dev_a);
  160. cudaFree(dev_b);
  161.  
  162. return cudaStatus;
  163. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement