Advertisement
Guest User

Untitled

a guest
Jan 21st, 2018
54
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.40 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <sys/time.h>
  4.  
  5. #define N 1024
  6. #define BLOCK_SIZE 1024
  7.  
  8. float hArray[N];
  9. float *dArray;
  10. int blocks;
  11.  
  12. float* result;
  13. float* dResult;
  14.  
  15. void prologue(void) {
  16. cudaMalloc((void**) &dArray, sizeof(hArray));
  17. cudaMemcpy(dArray, hArray, sizeof(hArray), cudaMemcpyHostToDevice);
  18. cudaMalloc((void**) &dResult, sizeof(result));
  19. cudaMemcpy(dResult, result, sizeof(result), cudaMemcpyHostToDevice);
  20. }
  21.  
  22. void epilogue(void) {
  23. cudaMemcpy(hArray, dArray, sizeof(hArray), cudaMemcpyDeviceToHost);
  24. cudaFree(dArray);
  25. cudaMemcpy(result, dResult, sizeof(result), cudaMemcpyDeviceToHost);
  26. cudaFree(dResult);
  27. }
  28.  
  29. __global__ void calcDevice(float *A, float* result) {
  30. int x = blockDim.x * blockIdx.x + threadIdx.x;
  31.  
  32. if (x < N) {
  33. atomicMax((int*) result, __float_as_int(A[x]));
  34. }
  35. }
  36.  
  37. void findMax(void) {
  38. srand(time(NULL));
  39. for (int i = 0; i < N; i++) {
  40. hArray[i] = (float) rand() / RAND_MAX;
  41. }
  42. float max = 0.0F;
  43. result = &max;
  44.  
  45. prologue();
  46.  
  47. blocks = N / BLOCK_SIZE;
  48. if (N % BLOCK_SIZE)
  49. blocks++;
  50.  
  51. calcDevice<<<blocks, BLOCK_SIZE>>>(dArray, dResult);
  52.  
  53. cudaThreadSynchronize();
  54. epilogue();
  55.  
  56. printf("Max = %f\n", max);
  57. }
  58.  
  59. int main(int argc, char** argv) {
  60. int devCnt;
  61.  
  62. cudaGetDeviceCount(&devCnt);
  63. if (devCnt == 0) {
  64. perror("No CUDA devices available -- exiting.");
  65. return 1;
  66. }
  67.  
  68. findMax();
  69.  
  70. return 0;
  71. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement