Advertisement
Guest User

Untitled

a guest
May 22nd, 2019
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.64 KB | None | 0 0
  1. #include "is.h"
  2. #include "math.h"
  3. #include <vector>
  4. #include <stdio.h>
  5. #include <iostream>
  6. #include "vector.h"
  7. #include <tuple>
  8. #include <algorithm>
  9. #include "stopwatch.h"
  10. #include <immintrin.h>
  11.  
  12. inline int static divup(int a, int b) {
  13. return (a + b - 1)/b;
  14. }
  15. inline int static roundup(int a, int b) {
  16. return divup(a, b) * b;
  17. }
  18. inline float hmax8(float8_t vv) {
  19. float v = 0;
  20. for (int i = 0; i < 8; ++i) {
  21. v = std::max(vv[i], v);
  22. }
  23. return v;
  24. }
  25.  
  26. Result segment(int ny, int nx, const float* data) {
  27. // ppc::stopwatch sw;
  28. // sw.record();
  29. int vs = 8;
  30. int nxp = roundup(nx + vs, vs);
  31. int nyp = ny + 1;
  32. std::vector<float> s(nxp*nyp);
  33.  
  34.  
  35. for (int y = 0; y < nyp; y++) {
  36. for (int x = 0; x < nxp; x++){
  37. float elem = NAN;
  38. if (x < nx + 1 && y < ny +1) {
  39. if (x == 0 || y == 0) {
  40. elem = 0;
  41. } else {
  42. float add = data[(x-1)*3 + (y-1)*3*nx];
  43. elem = s[(x-1) + y*nxp] + s[x + (y-1)*nxp] - s[(x-1) + (y-1)*nxp] + add;
  44. }
  45. }
  46. s[x + y*nxp] = elem;
  47. }
  48. }
  49.  
  50. // sw.record();
  51.  
  52. // std::cout<<"\ndata:\n";
  53. // for (int y = 0; y < ny; y++) {
  54. // for (int x = 0; x < nx; x++) {
  55. // std::cout<<data[x*3 + y*3*nx] << " ";
  56. // }
  57. // std::cout<<"\n";
  58. // }
  59.  
  60. // std::cout<<"\nprocessed:\n";
  61. // for (int y = 0; y < nyp; y++) {
  62. // for (int x = 0; x < nxp; x++) {
  63. // std::cout<<s[x + y*nxp] << " ";
  64. // }
  65. // std::cout<<"\n";
  66. // }
  67.  
  68. float8_t hval = float8_0;
  69. int height = 0;
  70. int width = 0;
  71.  
  72. float vp = s[nx + nxp*ny];
  73. float vp2 = vp * vp;
  74.  
  75. #pragma omp parallel
  76. {
  77. float8_t tprev = float8_0;
  78. float8_t th = float8_0;
  79. int bh = 0;
  80. int bw = 0;
  81. #pragma omp for nowait schedule(dynamic, 1)
  82. for (int h = 1; h < ny + 1; h++) {
  83. for (int w = 1; w < nx + 1; w++) {
  84. int ax = w * h;
  85. int ay = ny*nx - ax;
  86.  
  87. if (ax != ny*nx) {
  88.  
  89. float iax = 1.0/ax;
  90. float iay = 1.0/ay;
  91. //some equation solving happened here :D
  92. float a = vp2 * iay;
  93. float b = -2*vp*iay;
  94. float c = (ay + ax)*iax*iay;
  95.  
  96. int yub = ny - h + 2;
  97. int xub = nx - w + 2;
  98. for (int y0 = 1; y0 < yub; y0++) {
  99. for (int x0 = 1; x0 < xub; x0+=vs) {
  100. int y1 = y0 - 1 + h;
  101. int x1 = x0 - 1 + w;
  102.  
  103. float8_t x1y1 = _mm256_loadu_ps(s.data() + x1 + y1*nxp);
  104. float8_t x1y0 = _mm256_loadu_ps(s.data() + x1 + (y0-1)*nxp);
  105. float8_t x0y1 = _mm256_loadu_ps(s.data() + (x0-1) + y1*nxp);
  106. float8_t x0y0 = _mm256_loadu_ps(s.data() + (x0-1) + (y0-1)*nxp);
  107.  
  108. float8_t vx = x1y1 - x0y1 - x1y0 + x0y0;
  109. float8_t hv = a + vx*(b + c*vx);
  110.  
  111. th = (hv > th) ? hv : th;
  112. }
  113. }
  114.  
  115. float8_t d = _mm256_cmp_ps(th, tprev, _CMP_GT_OQ);
  116. if (_mm256_movemask_ps(d)) {
  117. float hmax = hmax8(th); //horizontal max
  118. tprev = _mm256_set1_ps(hmax); //vector of hmax
  119. bh = h; //best height to current
  120. bw = w; //best width to current
  121. }
  122. }
  123. }
  124. }
  125. #pragma omp critical
  126. {
  127. float8_t d = _mm256_cmp_ps(tprev, hval, _CMP_GT_OQ);
  128. if (_mm256_movemask_ps(d)) {
  129. hval = tprev;
  130. height = bh;
  131. width = bw;
  132. }
  133. }
  134. }
  135.  
  136. // sw.record();
  137.  
  138. Result best;
  139. float xval = 0;
  140. int ax = width * height;
  141. int ay = ny*nx - ax;
  142. float iax = 1.0/ax;
  143. float iay = 1.0/ay;
  144. int yub = ny - height + 2;
  145. int xub = nx - width + 2;
  146. for (int y0 = 1; y0 < yub; y0++) {
  147. for (int x0 = 1; x0 < xub; x0++) {
  148.  
  149. int y1 = y0 - 1 + height;
  150. int x1 = x0 - 1 + width;
  151.  
  152. float x1y1 = s[x1 + y1*nxp];
  153. float x1y0 = s[x1 + (y0-1)*nxp];
  154. float x0y1 = s[(x0-1) + y1*nxp];
  155. float x0y0 = s[(x0-1) + (y0-1)*nxp];
  156.  
  157.  
  158. float vx = x1y1 - x0y1 - x1y0 + x0y0;
  159. float vy = vp - vx;
  160. float xc = vx * iax;
  161. float yc = vy * iay;
  162.  
  163. vx = xc * vx;
  164. vy = yc * vy;
  165.  
  166. float h = vx + vy;
  167.  
  168. // std::cout<<"indices: (y1, x1, y0, x0): " << y1 << ", "<< x1 << ", "<<y0 << ", "<< x0 << "\n ";
  169. // std::cout<<"h:" <<h;
  170. if (h > xval) {
  171. xval = h;
  172. best.y0 = y0 - 1;
  173. best.x0 = x0 - 1;
  174. best.y1 = y1;
  175. best.x1 = x1;
  176. best.outer[0] = yc;
  177. best.outer[1] = yc;
  178. best.outer[2] = yc;
  179. best.inner[0] = xc;
  180. best.inner[1] = xc;
  181. best.inner[2] = xc;
  182. }
  183. }
  184. }
  185.  
  186. // sw.record();
  187. // sw.print();
  188.  
  189. return best;
  190. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement