Guest User

Untitled

a guest
Jan 16th, 2018
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.75 KB | None | 0 0
  1. #include <iomanip>
  2. #include <iostream>
  3. #include <cstdlib>
  4. #include <vector>
  5.  
  6. #include <cuda.h>
  7. #include <cudnn.h>
  8.  
  9. #define CUDA_CALL(f) { \
  10. cudaError_t err = (f); \
  11. if (err != cudaSuccess) { \
  12. std::cout \
  13. << " Error occurred: " << err << std::endl; \
  14. std::exit(1); \
  15. } \
  16. }
  17.  
  18. #define CUDNN_CALL(f) { \
  19. cudnnStatus_t err = (f); \
  20. if (err != CUDNN_STATUS_SUCCESS) { \
  21. std::cout \
  22. << " Error occurred: " << err << std::endl; \
  23. std::exit(1); \
  24. } \
  25. }
  26.  
  27. __global__ void dev_const(float *px, float k) {
  28. int tid = threadIdx.x + blockIdx.x * blockDim.x;
  29. px[tid] = k;
  30. }
  31.  
  32. __global__ void dev_iota(float *px) {
  33. int tid = threadIdx.x + blockIdx.x * blockDim.x;
  34. px[tid] = tid;
  35. }
  36.  
  37. void print(const float *data, int n, int c, int h, int w) {
  38. std::vector<float> buffer(1 << 20);
  39. CUDA_CALL(cudaMemcpy(
  40. buffer.data(), data,
  41. n * c * h * w * sizeof(float),
  42. cudaMemcpyDeviceToHost));
  43. int a = 0;
  44. for (int i = 0; i < n; ++i) {
  45. for (int j = 0; j < c; ++j) {
  46. std::cout << "n=" << i << ", c=" << j << ":" << std::endl;
  47. for (int k = 0; k < h; ++k) {
  48. for (int l = 0; l < w; ++l) {
  49. std::cout << std::setw(4) << std::right << buffer[a];
  50. ++a;
  51. }
  52. std::cout << std::endl;
  53. }
  54. }
  55. }
  56. std::cout << std::endl;
  57. }
  58.  
  59. int main() {
  60. cudnnHandle_t cudnn;
  61. CUDNN_CALL(cudnnCreate(&cudnn));
  62.  
  63. // input
  64. const int in_n = 1;
  65. const int in_c = 1;
  66. const int in_h = 5;
  67. const int in_w = 5;
  68. std::cout << "in_n: " << in_n << std::endl;
  69. std::cout << "in_c: " << in_c << std::endl;
  70. std::cout << "in_h: " << in_h << std::endl;
  71. std::cout << "in_w: " << in_w << std::endl;
  72. std::cout << std::endl;
  73.  
  74. cudnnTensorDescriptor_t in_desc;
  75. CUDNN_CALL(cudnnCreateTensorDescriptor(&in_desc));
  76. CUDNN_CALL(cudnnSetTensor4dDescriptor(
  77. in_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT,
  78. in_n, in_c, in_h, in_w));
  79.  
  80. float *in_data;
  81. CUDA_CALL(cudaMalloc(
  82. &in_data, in_n * in_c * in_h * in_w * sizeof(float)));
  83.  
  84. // filter
  85. const int filt_k = 1;
  86. const int filt_c = 1;
  87. const int filt_h = 2;
  88. const int filt_w = 2;
  89. std::cout << "filt_k: " << filt_k << std::endl;
  90. std::cout << "filt_c: " << filt_c << std::endl;
  91. std::cout << "filt_h: " << filt_h << std::endl;
  92. std::cout << "filt_w: " << filt_w << std::endl;
  93. std::cout << std::endl;
  94.  
  95. cudnnFilterDescriptor_t filt_desc;
  96. CUDNN_CALL(cudnnCreateFilterDescriptor(&filt_desc));
  97. CUDNN_CALL(cudnnSetFilter4dDescriptor(
  98. filt_desc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW,
  99. filt_k, filt_c, filt_h, filt_w));
  100.  
  101. float *filt_data;
  102. CUDA_CALL(cudaMalloc(
  103. &filt_data, filt_k * filt_c * filt_h * filt_w * sizeof(float)));
  104.  
  105. // convolution
  106. const int pad_h = 1;
  107. const int pad_w = 1;
  108. const int str_h = 1;
  109. const int str_w = 1;
  110. const int dil_h = 1;
  111. const int dil_w = 1;
  112. std::cout << "pad_h: " << pad_h << std::endl;
  113. std::cout << "pad_w: " << pad_w << std::endl;
  114. std::cout << "str_h: " << str_h << std::endl;
  115. std::cout << "str_w: " << str_w << std::endl;
  116. std::cout << "dil_h: " << dil_h << std::endl;
  117. std::cout << "dil_w: " << dil_w << std::endl;
  118. std::cout << std::endl;
  119.  
  120. cudnnConvolutionDescriptor_t conv_desc;
  121. CUDNN_CALL(cudnnCreateConvolutionDescriptor(&conv_desc));
  122. CUDNN_CALL(cudnnSetConvolution2dDescriptor(
  123. conv_desc,
  124. pad_h, pad_w, str_h, str_w, dil_h, dil_w,
  125. CUDNN_CONVOLUTION, CUDNN_DATA_FLOAT));
  126.  
  127. // output
  128. int out_n;
  129. int out_c;
  130. int out_h;
  131. int out_w;
  132.  
  133. CUDNN_CALL(cudnnGetConvolution2dForwardOutputDim(
  134. conv_desc, in_desc, filt_desc,
  135. &out_n, &out_c, &out_h, &out_w));
  136.  
  137. std::cout << "out_n: " << out_n << std::endl;
  138. std::cout << "out_c: " << out_c << std::endl;
  139. std::cout << "out_h: " << out_h << std::endl;
  140. std::cout << "out_w: " << out_w << std::endl;
  141. std::cout << std::endl;
  142.  
  143. cudnnTensorDescriptor_t out_desc;
  144. CUDNN_CALL(cudnnCreateTensorDescriptor(&out_desc));
  145. CUDNN_CALL(cudnnSetTensor4dDescriptor(
  146. out_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT,
  147. out_n, out_c, out_h, out_w));
  148.  
  149. float *out_data;
  150. CUDA_CALL(cudaMalloc(
  151. &out_data, out_n * out_c * out_h * out_w * sizeof(float)));
  152.  
  153. // algorithm
  154. cudnnConvolutionFwdAlgo_t algo;
  155. CUDNN_CALL(cudnnGetConvolutionForwardAlgorithm(
  156. cudnn,
  157. in_desc, filt_desc, conv_desc, out_desc,
  158. CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, 0, &algo));
  159.  
  160. std::cout << "Convolution algorithm: " << algo << std::endl;
  161. std::cout << std::endl;
  162.  
  163. // workspace
  164. size_t ws_size;
  165. CUDNN_CALL(cudnnGetConvolutionForwardWorkspaceSize(
  166. cudnn, in_desc, filt_desc, conv_desc, out_desc, algo, &ws_size));
  167.  
  168. float *ws_data;
  169. CUDA_CALL(cudaMalloc(&ws_data, ws_size));
  170.  
  171. std::cout << "Workspace size: " << ws_size << std::endl;
  172. std::cout << std::endl;
  173.  
  174. // perform
  175. float alpha = 1.f;
  176. float beta = 0.f;
  177. dev_iota<<<in_w * in_h, in_n * in_c>>>(in_data);
  178. dev_const<<<filt_w * filt_h, filt_k * filt_c>>>(filt_data, 1.f);
  179. CUDNN_CALL(cudnnConvolutionForward(
  180. cudnn,
  181. &alpha, in_desc, in_data, filt_desc, filt_data,
  182. conv_desc, algo, ws_data, ws_size,
  183. &beta, out_desc, out_data));
  184.  
  185. // results
  186. std::cout << "in_data:" << std::endl;
  187. print(in_data, in_n, in_c, in_h, in_w);
  188.  
  189. std::cout << "filt_data:" << std::endl;
  190. print(filt_data, filt_k, filt_c, filt_h, filt_w);
  191.  
  192. std::cout << "out_data:" << std::endl;
  193. print(out_data, out_n, out_c, out_h, out_w);
  194.  
  195. // finalizing
  196. CUDA_CALL(cudaFree(ws_data));
  197. CUDA_CALL(cudaFree(out_data));
  198. CUDNN_CALL(cudnnDestroyTensorDescriptor(out_desc));
  199. CUDNN_CALL(cudnnDestroyConvolutionDescriptor(conv_desc));
  200. CUDA_CALL(cudaFree(filt_data));
  201. CUDNN_CALL(cudnnDestroyFilterDescriptor(filt_desc));
  202. CUDA_CALL(cudaFree(in_data));
  203. CUDNN_CALL(cudnnDestroyTensorDescriptor(in_desc));
  204. CUDNN_CALL(cudnnDestroy(cudnn));
  205. return 0;
  206. }
Add Comment
Please, Sign In to add comment