Advertisement
Guest User

Untitled

a guest
Dec 15th, 2017
59
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.56 KB | None | 0 0
  1. diff --git a/caffe2/core/common_cudnn.h b/caffe2/core/common_cudnn.h
  2. index f93f333..5655b33 100644
  3. --- a/caffe2/core/common_cudnn.h
  4. +++ b/caffe2/core/common_cudnn.h
  5. @@ -31,6 +31,12 @@
  6. static_assert(
  7. CUDNN_VERSION >= 5000,
  8. "Caffe2 requires cudnn version 5.0 or above.");
  9. +
  10. +#if CUDNN_VERSION < 6000
  11. +#pragma message "CUDNN version under 6.0 is supported at best effort."
  12. +#pragma message "We strongly encourage you to move to 6.0 and above."
  13. +#pragma message "This message is intended to annoy you enough to update."
  14. +#endif // CUDNN_VERSION < 6000
  15.  
  16. #define CUDNN_VERSION_MIN(major, minor, patch) \
  17. (CUDNN_VERSION >= ((major) * 1000 + (minor) * 100 + (patch)))
  18. @@ -136,6 +142,7 @@ class cudnnTypeWrapper<float> {
  19. }
  20. };
  21.  
  22. +#if CUDNN_VERSION_MIN(6, 0, 0)
  23. template <>
  24. class cudnnTypeWrapper<int> {
  25. public:
  26. @@ -151,6 +158,7 @@ class cudnnTypeWrapper<int> {
  27. return &v;
  28. }
  29. };
  30. +#endif // CUDNN_VERSION_MIN(6, 0, 0)
  31.  
  32. template <>
  33. class cudnnTypeWrapper<double> {
  34. diff --git a/caffe2/operators/pool_op_cudnn.cu b/caffe2/operators/pool_op_cudnn.cu
  35. index 5c18c4a..bfe491d 100644
  36. --- a/caffe2/operators/pool_op_cudnn.cu
  37. +++ b/caffe2/operators/pool_op_cudnn.cu
  38. @@ -134,8 +134,11 @@ class CuDNNPoolOp : public ConvPoolOpBase<CUDAContext> {
  39. CUDNN_ENFORCE(cudnnCreatePoolingDescriptor(&pooling_desc_));
  40. // Figure out the pooling descriptor.
  41. if (operator_def.type().substr(0, 7) == "MaxPool") {
  42. -#if CUDNN_VERSION_MIN(6,0,0)
  43. - mode_ = CUDNN_POOLING_MAX_DETERMINISTIC;
  44. + bool deterministic =
  45. + OperatorBase::GetSingleArgument<bool>("deterministic", false);
  46. +#if CUDNN_VERSION_MIN(6, 0, 0)
  47. + mode_ =
  48. + deterministic ? CUDNN_POOLING_MAX_DETERMINISTIC : CUDNN_POOLING_MAX;
  49. #else
  50. mode_ = CUDNN_POOLING_MAX;
  51. #endif
  52. @@ -253,15 +256,17 @@ class CuDNNPoolOp : public ConvPoolOpBase<CUDAContext> {
  53. }
  54. }
  55. // Carry out the pooling computation.
  56. + const T* Xdata = X.template data<T>();
  57. + T* Ydata = Y->template mutable_data<T>();
  58. CUDNN_ENFORCE(cudnnPoolingForward(
  59. cudnn_wrapper_.inline_cudnn_handle(),
  60. pooling_desc_,
  61. cudnnTypeWrapper<T>::kOne(),
  62. bottom_desc_,
  63. - X.template data<T>(),
  64. + Xdata,
  65. cudnnTypeWrapper<T>::kZero(),
  66. top_desc_,
  67. - Y->template mutable_data<T>()));
  68. + Ydata));
  69. return true;
  70. }
  71.  
  72. @@ -382,8 +387,12 @@ class CuDNNPoolGradientOp : public ConvPoolOpBase<CUDAContext> {
  73. dX->mutable_data<float>());
  74. return true;
  75. }
  76. +#if CUDNN_VERSION_MIN(6, 0, 0)
  77. if (mode_ == CUDNN_POOLING_MAX ||
  78. mode_ == CUDNN_POOLING_MAX_DETERMINISTIC) {
  79. +#else
  80. + if (mode_ == CUDNN_POOLING_MAX) {
  81. +#endif
  82. global_maxpool_backward_NCHW<float>
  83. <<<CAFFE_GET_BLOCKS(dX->size()),
  84. CAFFE_CUDA_NUM_THREADS,
  85. @@ -449,19 +458,24 @@ class CuDNNPoolGradientOp : public ConvPoolOpBase<CUDAContext> {
  86. }
  87. }
  88. // Carry out the pooling computation.
  89. + const T* Xdata = X.template data<T>();
  90. + const T* Ydata = Y.template data<T>();
  91. + const T* dYdata = dY.template data<T>();
  92. + T* dXdata = dX->template mutable_data<T>();
  93. +
  94. CUDNN_ENFORCE(cudnnPoolingBackward(
  95. cudnn_wrapper_.inline_cudnn_handle(),
  96. pooling_desc_,
  97. cudnnTypeWrapper<T>::kOne(),
  98. top_desc_,
  99. - Y.template data<T>(),
  100. + Ydata,
  101. top_desc_,
  102. - dY.template data<T>(),
  103. + dYdata,
  104. bottom_desc_,
  105. - X.template data<T>(),
  106. + Xdata,
  107. cudnnTypeWrapper<T>::kZero(),
  108. bottom_desc_,
  109. - dX->template mutable_data<T>()));
  110. + dXdata));
  111. return true;
  112. }
  113.  
  114. @@ -493,7 +507,7 @@ class CuDNNPoolGradientOp : public ConvPoolOpBase<CUDAContext> {
  115.  
  116. // Input: X, Y, dY
  117. // Output: dX
  118. - INPUT_TAGS(IN, OUT, OUT_GRAD);
  119. + // INPUT_TAGS(IN, OUT, OUT_GRAD);
  120. };
  121.  
  122. namespace {
  123. diff --git a/caffe2/utils/GpuBitonicSort.cuh b/caffe2/utils/GpuBitonicSort.cuh
  124. index f52bb50..45cb298 100644
  125. --- a/caffe2/utils/GpuBitonicSort.cuh
  126. +++ b/caffe2/utils/GpuBitonicSort.cuh
  127. @@ -6,6 +6,19 @@
  128.  
  129. namespace caffe2 {
  130.  
  131. +// Returns true if the given integer type is a power-of-2 (positive only)
  132. +// Note(jiayq): windows reported an error per
  133. +// https://github.com/caffe2/caffe2/issues/997
  134. +// and as a result will make it a macro.
  135. +#ifdef _MSC_VER
  136. +#define integerIsPowerOf2(v) ((v) && !((v) & ((v) - 1)))
  137. +#else // _MSC_VER
  138. +template <typename T>
  139. +constexpr bool integerIsPowerOf2(T v) {
  140. + return (v && !(v & (v - 1)));
  141. +}
  142. +#endif // _MSC_VER
  143. +
  144. /// The maximum in-block bitonic sort we support
  145. constexpr int kMaxBitonicSortSize = 4096;
  146.  
  147. @@ -39,9 +52,9 @@ __device__ inline void bitonicSort(K* keys,
  148. // Assume the sort is taking place in shared memory
  149. // static_assert(Power2SortSize * (sizeof(K) + sizeof(V)) < 32768,
  150. // "sort data too large (>32768 bytes)");
  151. - static_assert(math::integerIsPowerOf2(Power2SortSize),
  152. + static_assert(integerIsPowerOf2(Power2SortSize),
  153. "sort size must be power of 2");
  154. - static_assert(math::integerIsPowerOf2(ThreadsPerBlock),
  155. + static_assert(integerIsPowerOf2(ThreadsPerBlock),
  156. "threads in block must be power of 2");
  157.  
  158. // If what we are sorting is too small, then not all threads
  159. @@ -107,7 +120,7 @@ __device__ inline void warpBitonicSort(K* keys,
  160. // Smaller sorts should use a warp shuffle sort
  161. static_assert(Power2SortSize > kWarpSize,
  162. "sort not large enough");
  163. - static_assert(math::integerIsPowerOf2(Power2SortSize),
  164. + static_assert(integerIsPowerOf2(Power2SortSize),
  165. "sort size must be power of 2");
  166. static_assert(Power2SortSize <= kMaxBitonicSortSize,
  167. "sort size <= 4096 only supported");
  168. diff --git a/caffe2/utils/math.h b/caffe2/utils/math.h
  169. index 487a77d..3da68cd 100644
  170. --- a/caffe2/utils/math.h
  171. +++ b/caffe2/utils/math.h
  172. @@ -468,19 +468,6 @@ constexpr T roundUp(T a, T b) {
  173. return divUp<T>(a, b) * b;
  174. }
  175.  
  176. -// Returns true if the given integer type is a power-of-2 (positive only)
  177. -// Note(jiayq): windows reported an error per
  178. -// https://github.com/caffe2/caffe2/issues/997
  179. -// and as a result will make it a macro.
  180. -#ifdef _MSC_VER
  181. -#define integerIsPowerOf2(v) ((v) && !((v) & ((v) - 1)))
  182. -#else // _MSC_VER
  183. -template <typename T>
  184. -constexpr bool integerIsPowerOf2(T v) {
  185. - return (v && !(v & (v - 1)));
  186. -}
  187. -#endif // _MSC_VER
  188. -
  189. // Returns log2(n) for a positive integer type
  190. template <typename T>
  191. constexpr int integerLog2(T n, int p = 0) {
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement