Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <arm_neon.h>
- #include "rect_sad.h"
- #include <assert.h>
- typedef unsigned char uint8_t;
- typedef unsigned int uint32_t;
- #ifndef ABS
- #define ABS(a) (((a)>=0)?(a):(-(a)))
- #endif
- uint32_t rect_sad(
- const uint8_t * src_line_r0,
- const uint8_t * targ_line_r1,
- unsigned rect_w_r2, unsigned rect_h_r3,
- unsigned stride_r4)
- {
- uint32x4_t neon_sad_r5 = vdupq_n_u32(0);
- unsigned gap_r6 = stride_r4 - rect_w_r2;
- const uint8_t * end_r7 = src_line_r0 + rect_h_r3 * stride_r4;
- for (; src_line_r0 < end_r7;
- src_line_r0 += gap_r6, targ_line_r1 += gap_r6)
- {
- uint16x8_t line_sad16 = vdupq_n_u16(0);
- const uint8_t * line_end_r8 = &src_line_r0[rect_w_r2];
- for (; src_line_r0 < line_end_r8; src_line_r0+=8, targ_line_r1+=8)
- {
- uint8x8_t s = vld1_u8(src_line_r0);
- uint8x8_t t = vld1_u8(targ_line_r1);
- line_sad16 = vabal_u8(line_sad16, s, t);
- }
- neon_sad_r5 = vpadalq_u16(neon_sad_r5, line_sad16);
- }
- uint32_t neon_sad_store[4];
- uint32_t ret = 0;
- vst1q_u32(neon_sad_store, neon_sad_r5);
- for(int i = 0; i < 4; ++i) { ret += neon_sad_store[i];}
- return ret;
- }
Advertisement
Add Comment
Please, Sign In to add comment