runnig

SAD, sum of absolute differences in C++ (NEON instrinsics)

Nov 7th, 2012
208
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 1.11 KB | None | 0 0
  1. #include <arm_neon.h>
  2. #include "rect_sad.h"
  3. #include <assert.h>
  4.  
  5. typedef unsigned char uint8_t;
  6. typedef unsigned int uint32_t;
  7.  
  8. #ifndef ABS
  9. #define ABS(a) (((a)>=0)?(a):(-(a)))
  10. #endif
  11.  
  12. uint32_t rect_sad(
  13.         const uint8_t * src_line_r0,
  14.         const uint8_t * targ_line_r1,
  15.         unsigned rect_w_r2, unsigned rect_h_r3,
  16.         unsigned stride_r4)
  17. {
  18.     uint32x4_t neon_sad_r5 = vdupq_n_u32(0);
  19.     unsigned gap_r6 = stride_r4 - rect_w_r2;
  20.     const uint8_t * end_r7 = src_line_r0 + rect_h_r3 * stride_r4;
  21.        
  22.     for (; src_line_r0 < end_r7;
  23.             src_line_r0 += gap_r6, targ_line_r1 += gap_r6)
  24.     {
  25.         uint16x8_t line_sad16 = vdupq_n_u16(0);
  26.  
  27.         const uint8_t * line_end_r8 = &src_line_r0[rect_w_r2];
  28.         for (; src_line_r0 < line_end_r8; src_line_r0+=8, targ_line_r1+=8)
  29.         {
  30.             uint8x8_t s = vld1_u8(src_line_r0);
  31.             uint8x8_t t = vld1_u8(targ_line_r1);
  32.             line_sad16 = vabal_u8(line_sad16, s, t);
  33.         }
  34.         neon_sad_r5 = vpadalq_u16(neon_sad_r5, line_sad16);
  35.     }
  36.     uint32_t neon_sad_store[4];
  37.     uint32_t ret = 0;
  38.     vst1q_u32(neon_sad_store, neon_sad_r5);
  39.     for(int i = 0; i < 4; ++i) { ret += neon_sad_store[i];}
  40.  
  41.     return ret;
  42. }
Advertisement
Add Comment
Please, Sign In to add comment