Advertisement
Guest User

Untitled

a guest
May 22nd, 2019
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.04 KB | None | 0 0
  1. #ifndef PYTHONIC_SCIPY_SIGNAL_CONVOLVE2D_HPP
  2. #define PYTHONIC_SCIPY_SIGNAL_CONVOLVE2D_HPP
  3.  
  4. #include "pythonic/include/scipy/signal/convolve2d.hpp"
  5. #include "pythonic/numpy/dot.hpp"
  6. #include "pythonic/numpy/conjugate.hpp"
  7. #include "pythonic/numpy/asarray.hpp"
  8. #include "pythonic/types/ndarray.hpp"
  9. #if defined(PYTHRAN_BLAS_ATLAS) || defined(PYTHRAN_BLAS_SATLAS)
  10. extern "C" {
  11. #endif
  12. #include <cblas.h>
  13. #if defined(PYTHRAN_BLAS_ATLAS) || defined(PYTHRAN_BLAS_SATLAS)
  14. }
  15. #endif
  16.  
  17. PYTHONIC_NS_BEGIN
  18.  
  19. namespace scipy
  20. {
  21. namespace signal
  22. {
  23.  
  24. // n,m input (and output) dim
  25. // r,q kernel dim
  26. void convol(double *im, double *kernel, double *out, unsigned n, unsigned m,
  27. unsigned r, unsigned q) __attribute__((noinline))
  28. {
  29. for (unsigned k = 0; k < r; ++k) // loop over kernel cols
  30. for (unsigned l = 0; l < q; ++l) // loop over kernel rows
  31. for (unsigned i = r / 2; i < n - r / 2; ++i) // loop over in cols
  32. for (unsigned j = q / 2; j < m - q / 2; ++j) // loop over in rows
  33. out[i * m + j] +=
  34. im[(i + k - r / 2) * m + (j + l - q / 2)] * kernel[k * q + l];
  35. }
  36.  
  37. void convoldot(double *im, double *kernel, double *out, unsigned n, unsigned m,
  38. unsigned r, unsigned q) __attribute__((noinline))
  39. {
  40. for (unsigned k = 0; k < r; ++k) // loop over kernel cols
  41. for (unsigned i = r / 2; i < n - r / 2; ++i) // loop over in cols
  42. for (unsigned j = q / 2; j < m - q / 2; ++j) // loop over in rows
  43. out[i * m + j] += cblas_ddot(q,im+(i + k - r / 2) * m + j - q / 2,1,kernel+k*q,1);
  44. }
  45.  
  46.  
  47.  
  48. #define min(A, B) ((A < B) ? (A) : (B))
  49. #define max(A, B) ((A > B) ? (A) : (B))
  50. void convol0(double *im, double *kernel, double *out, int n, int m, int r,
  51. int q) __attribute__((noinline))
  52. {
  53. for (int i = 0; i < n; ++i) // loop over in cols
  54. for (int j = 0; j < m; ++j) // loop over in rows
  55. {
  56. if (j >= q / 2 && i >= r / 2 && i < n - r / 2 && j<m - q / 2)
  57. j = m - q / 2;
  58.  
  59. //std::cout << "i " << i << " J " << j <<" ";
  60. //std::cout << "k " << max(0, r / 2 - i) << " -- " << min(r, n - i +
  61. //r / 2) <<" ";
  62. //std::cout << "l " << max(0, q / 2 - j) << " -- " << min(q, m - j +
  63. //q / 2) <<"\n";
  64.  
  65. for (int k = max(0, r / 2 - i); k < min(r, n - i + r / 2);
  66. ++k) // loop over kernel cols
  67. for (int l = max(0, q / 2 - j); l < min(q, m - j + q / 2);
  68. ++l) // loop over kernel rows
  69. out[i * m + j] +=
  70. im[(i + k - r / 2) * m + (j + l - q / 2)] * kernel[k * q + l];
  71. }
  72. }
  73.  
  74. template <class A, class B>
  75. types::ndarray<typename A::dtype, types::pshape<long, long>>
  76. convolve2d(A const &inA, B const &inB)
  77. // out_inc is used to indicate the inputs were swapped, which means that the
  78. // output must be time reversed and conjugated
  79. {
  80. auto shapeA = sutils::array(inA.shape());
  81. auto shapeB = sutils::array(inB.shape());
  82.  
  83. long NA = shapeA[0];
  84. long NB = shapeB[0];
  85. types::ndarray<typename A::dtype, types::pshape<long, long>> out = {
  86. shapeA, (typename A::dtype)(0)};
  87.  
  88. auto inA_ = numpy::functor::asarray{}(inA);
  89. auto inB_ = numpy::functor::asarray{}(inB);
  90. auto a = out.buffer;
  91. auto b = inA_.buffer;
  92. auto c = inB_.buffer;
  93.  
  94. std::cout << "CONV0\n";
  95. convol0(inA_.buffer, inB_.buffer, out.buffer, shapeA[0], shapeA[1],
  96. shapeB[0], shapeB[1]);
  97. std::cout << "CONV1\n";
  98. if(1)
  99. convol(inA_.buffer, inB_.buffer, out.buffer, shapeA[0], shapeA[1],
  100. shapeB[0], shapeB[1]);
  101. else
  102. convoldot(inA_.buffer, inB_.buffer, out.buffer, shapeA[0], shapeA[1],
  103. shapeB[0], shapeB[1]);
  104. std::cout << "DONE\n";
  105.  
  106. return out;
  107. }
  108.  
  109. NUMPY_EXPR_TO_NDARRAY0_IMPL(convolve2d)
  110. }
  111. }
  112. PYTHONIC_NS_END
  113.  
  114. #endif
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement