Advertisement
Guest User

Untitled

a guest
Apr 25th, 2016
137
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.80 KB | None | 0 0
  1. # (Approximate command-line. Settings inherited from host are not visible below.)
  2. # (Please see the output window after a build for the full command-line)
  3.  
  4. # Driver API (NVCC Compilation Type is .cubin, .gpu, or .ptx)
  5. set CUDAFE_FLAGS=--sdk_dir "C:\Program Files (x86)\Windows Kits\8.1\"
  6. "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin\nvcc.exe" --use-local-env --cl-version 2013 -ccbin "C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin" -I"D:\Software\Dropbox\Skola\Master's Courses\DH2323 Computer Graphics and Interaction\Project\CUDA_udacity\CUDA_udacity\include" -G --keep-dir Debug -maxrregcount=0 --machine 32 --compile -cudart static -o Debug\%(Filename)%(Extension).obj "%(FullPath)"
  7.  
  8. # Runtime API (NVCC Compilation Type is hybrid object or .c file)
  9. set CUDAFE_FLAGS=--sdk_dir "C:\Program Files (x86)\Windows Kits\8.1\"
  10. "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin\nvcc.exe" --use-local-env --cl-version 2013 -ccbin "C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin" -I"D:\Software\Dropbox\Skola\Master's Courses\DH2323 Computer Graphics and Interaction\Project\CUDA_udacity\CUDA_udacity\include" -G --keep-dir Debug -maxrregcount=0 --machine 32 --compile -cudart static -g -Xcompiler "/EHsc /nologo /Od /Zi " -o Debug\%(Filename)%(Extension).obj "%(FullPath)"
  11.  
  12.  
  13. //source code below
  14. //test.cu
  15. #include "test.h"
  16.  
  17. template <typename T>
  18. __global__ void add(T* d_out, const T* d_a, const T* d_b)
  19. {
  20. /* Adds arrays d_a and d_b and outputs the result to array d_out.
  21. *
  22. */
  23.  
  24. int i = threadIdx.x;
  25.  
  26. d_out[i] = d_a[i] + d_b[i];
  27. }
  28.  
  29. //test.h
  30. #include <stdio.h>
  31. #include <iostream>
  32. #include <time.h>
  33.  
  34. #include <cuda.h>
  35. #include "cuda_runtime.h"
  36. #include "device_launch_parameters.h"
  37.  
  38. template <typename T>
  39. __global__ void add(T* d_out, const T* d_a, const T* d_b);
  40.  
  41. //main.cu
  42. #include <stdio.h>
  43. #include <iostream>
  44. #include <time.h>
  45.  
  46. #include <cuda.h>
  47. #include "cuda_runtime.h"
  48. #include "device_launch_parameters.h"
  49.  
  50. #include "test.h"
  51.  
  52. using namespace std;
  53.  
  54. int main(int argc, char **argv)
  55. {
  56. const int ARRAY_SIZE = 1024;
  57. const int ARRAY_BYTES = sizeof(float) * ARRAY_SIZE;
  58.  
  59. float h_a[ARRAY_SIZE];
  60. float h_b[ARRAY_SIZE];
  61.  
  62. for (int i = 0; i < ARRAY_SIZE; i++)
  63. {
  64. h_a[i] = i / 2.0f;
  65. h_b[i] = (i + 1) / 3.0f;
  66. }
  67.  
  68. float h_out[ARRAY_SIZE];
  69.  
  70. float* d_a;
  71. float* d_b;
  72. float* d_out;
  73.  
  74. //clock_t t;
  75.  
  76. //t = clock();
  77. cudaMalloc(&d_a, ARRAY_BYTES);
  78. cudaMalloc(&d_b, ARRAY_BYTES);
  79. cudaMalloc(&d_out, ARRAY_BYTES);
  80.  
  81. cudaMemcpy(d_a, h_a, ARRAY_BYTES, cudaMemcpyHostToDevice);
  82. cudaMemcpy(d_b, h_b, ARRAY_BYTES, cudaMemcpyHostToDevice);
  83.  
  84. add <<<1, ARRAY_SIZE>>>(d_out, d_a, d_b);
  85.  
  86. cudaMemcpy(h_out, d_out, ARRAY_BYTES, cudaMemcpyDeviceToHost);
  87.  
  88. //cout << clock() - t << endl;
  89.  
  90. std::cin.get();
  91. return 0;
  92. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement