Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "is.h"
- #include "math.h"
- #include <vector>
- #include <stdio.h>
- #include <iostream>
- #include "vector.h"
- #include <tuple>
- #include <algorithm>
- #include "stopwatch.h"
- #include <immintrin.h>
- inline int static divup(int a, int b) {
- return (a + b - 1)/b;
- }
- inline int static roundup(int a, int b) {
- return divup(a, b) * b;
- }
- inline float hmax8(float8_t vv) {
- float v = 0;
- for (int i = 0; i < 8; ++i) {
- v = std::max(vv[i], v);
- }
- return v;
- }
- Result segment(int ny, int nx, const float* data) {
- // ppc::stopwatch sw;
- // sw.record();
- int vs = 8;
- int nxp = roundup(nx + vs, vs);
- int nyp = ny + 1;
- std::vector<float> s(nxp*nyp);
- for (int y = 0; y < nyp; y++) {
- for (int x = 0; x < nxp; x++){
- float elem = NAN;
- if (x < nx + 1 && y < ny +1) {
- if (x == 0 || y == 0) {
- elem = 0;
- } else {
- float add = data[(x-1)*3 + (y-1)*3*nx];
- elem = s[(x-1) + y*nxp] + s[x + (y-1)*nxp] - s[(x-1) + (y-1)*nxp] + add;
- }
- }
- s[x + y*nxp] = elem;
- }
- }
- // sw.record();
- // std::cout<<"\ndata:\n";
- // for (int y = 0; y < ny; y++) {
- // for (int x = 0; x < nx; x++) {
- // std::cout<<data[x*3 + y*3*nx] << " ";
- // }
- // std::cout<<"\n";
- // }
- // std::cout<<"\nprocessed:\n";
- // for (int y = 0; y < nyp; y++) {
- // for (int x = 0; x < nxp; x++) {
- // std::cout<<s[x + y*nxp] << " ";
- // }
- // std::cout<<"\n";
- // }
- float8_t hval = float8_0;
- int height = 0;
- int width = 0;
- float vp = s[nx + nxp*ny];
- float vp2 = vp * vp;
- #pragma omp parallel
- {
- float8_t tprev = float8_0;
- float8_t th = float8_0;
- int bh = 0;
- int bw = 0;
- #pragma omp for nowait schedule(dynamic, 1)
- for (int h = 1; h < ny + 1; h++) {
- for (int w = 1; w < nx + 1; w++) {
- int ax = w * h;
- int ay = ny*nx - ax;
- if (ax != ny*nx) {
- float iax = 1.0/ax;
- float iay = 1.0/ay;
- //some equation solving happened here :D
- float a = vp2 * iay;
- float b = -2*vp*iay;
- float c = (ay + ax)*iax*iay;
- int yub = ny - h + 2;
- int xub = nx - w + 2;
- for (int y0 = 1; y0 < yub; y0++) {
- for (int x0 = 1; x0 < xub; x0+=vs) {
- int y1 = y0 - 1 + h;
- int x1 = x0 - 1 + w;
- float8_t x1y1 = _mm256_loadu_ps(s.data() + x1 + y1*nxp);
- float8_t x1y0 = _mm256_loadu_ps(s.data() + x1 + (y0-1)*nxp);
- float8_t x0y1 = _mm256_loadu_ps(s.data() + (x0-1) + y1*nxp);
- float8_t x0y0 = _mm256_loadu_ps(s.data() + (x0-1) + (y0-1)*nxp);
- float8_t vx = x1y1 - x0y1 - x1y0 + x0y0;
- float8_t hv = a + vx*(b + c*vx);
- th = (hv > th) ? hv : th;
- }
- }
- float8_t d = _mm256_cmp_ps(th, tprev, _CMP_GT_OQ);
- if (_mm256_movemask_ps(d)) {
- float hmax = hmax8(th); //horizontal max
- tprev = _mm256_set1_ps(hmax); //vector of hmax
- bh = h; //best height to current
- bw = w; //best width to current
- }
- }
- }
- }
- #pragma omp critical
- {
- float8_t d = _mm256_cmp_ps(tprev, hval, _CMP_GT_OQ);
- if (_mm256_movemask_ps(d)) {
- hval = tprev;
- height = bh;
- width = bw;
- }
- }
- }
- // sw.record();
- Result best;
- float xval = 0;
- int ax = width * height;
- int ay = ny*nx - ax;
- float iax = 1.0/ax;
- float iay = 1.0/ay;
- int yub = ny - height + 2;
- int xub = nx - width + 2;
- for (int y0 = 1; y0 < yub; y0++) {
- for (int x0 = 1; x0 < xub; x0++) {
- int y1 = y0 - 1 + height;
- int x1 = x0 - 1 + width;
- float x1y1 = s[x1 + y1*nxp];
- float x1y0 = s[x1 + (y0-1)*nxp];
- float x0y1 = s[(x0-1) + y1*nxp];
- float x0y0 = s[(x0-1) + (y0-1)*nxp];
- float vx = x1y1 - x0y1 - x1y0 + x0y0;
- float vy = vp - vx;
- float xc = vx * iax;
- float yc = vy * iay;
- vx = xc * vx;
- vy = yc * vy;
- float h = vx + vy;
- // std::cout<<"indices: (y1, x1, y0, x0): " << y1 << ", "<< x1 << ", "<<y0 << ", "<< x0 << "\n ";
- // std::cout<<"h:" <<h;
- if (h > xval) {
- xval = h;
- best.y0 = y0 - 1;
- best.x0 = x0 - 1;
- best.y1 = y1;
- best.x1 = x1;
- best.outer[0] = yc;
- best.outer[1] = yc;
- best.outer[2] = yc;
- best.inner[0] = xc;
- best.inner[1] = xc;
- best.inner[2] = xc;
- }
- }
- }
- // sw.record();
- // sw.print();
- return best;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement