Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- extern "C" const unsigned long long
- #if !defined(__CYGWIN__) && !defined(__MINGW32__)
- _weighted_rnd[]
- #else
- weighted_rnd[]
- #endif
- __attribute__((used)) = {
- 0x0020002000200020ULL
- };
- #endif
- static inline void add_bidir_weighted_type2_calc(const int16_t weight[], const uint8_t *src0, const uint8_t *src1, uint8_t *dst, int width, int height, int stride)
- {
- #if defined(__GNUC__) && defined(__i386__)
- asm volatile ("\n\t"
- "push %%ebp\n\t"
- "push %%esi\n\t"
- "push %%ebx\n\t"
- "push %%edi\n\t"
- "movl %0, %%esi\n\t"
- "movsxw (%%esi), %%eax\n\t"
- "movsxw 2(%%esi), %%ecx\n\t"
- "movd %%eax, %%mm1\n\t"
- "movd %%ecx, %%mm2\n\t"
- "movl %1, %%eax\n\t"
- "movl %2, %%ebx\n\t"
- "movl %3, %%ecx\n\t"
- "movl %4, %%edx\n\t"
- "movl %5, %%esi\n\t"
- "movl %6, %%edi\n\t"
- "movq %%mm1, %%mm3\n\t"
- "movq %%mm2, %%mm4\n\t"
- "punpcklwd %%mm3, %%mm1\n\t"
- "punpcklwd %%mm4, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "movq %%mm2, %%mm4\n\t"
- "punpckldq %%mm3, %%mm1\n\t"
- "punpckldq %%mm4, %%mm2\n\t"
- "movq _weighted_rnd, %%mm5\n\t"
- "pxor %%mm0, %%mm0\n\t"
- "cmp $4, %%edx\n\t"
- "jne 1f\n\t"
- "0:\n\t"
- "movd (%%eax), %%mm3\n\t"
- "movd (%%ebx), %%mm4\n\t"
- "add $4, %%eax\n\t"
- "add $4, %%ebx\n\t"
- "punpcklbw %%mm0, %%mm3\n\t"
- "punpcklbw %%mm0, %%mm4\n\t"
- "pmullw %%mm1, %%mm3\n\t"
- "pmullw %%mm2, %%mm4\n\t"
- "paddsw %%mm5, %%mm3\n\t"
- "paddsw %%mm4, %%mm3\n\t"
- "psraw $6, %%mm3\n\t"
- "packuswb %%mm0, %%mm3\n\t"
- "movd %%mm3, (%%ecx)\n\t"
- "add %%edi, %%ecx\n\t"
- "add $-1, %%esi\n\t"
- "jnz 0b\n\t"
- "jmp 5f\n\t"
- "1:\n\t"
- "cmp $8, %%edx\n\t"
- "jne 3f\n\t"
- "2:\n\t"
- "movq (%%eax), %%mm3\n\t"
- "movq (%%ebx), %%mm4\n\t"
- "add $8, %%eax\n\t"
- "add $8, %%ebx\n\t"
- "movq %%mm3, %%mm6\n\t"
- "movq %%mm4, %%mm7\n\t"
- "punpcklbw %%mm0, %%mm3\n\t"
- "punpcklbw %%mm0, %%mm4\n\t"
- "punpckhbw %%mm0, %%mm6\n\t"
- "punpckhbw %%mm0, %%mm7\n\t"
- "pmullw %%mm1, %%mm3\n\t"
- "pmullw %%mm2, %%mm4\n\t"
- "pmullw %%mm1, %%mm6\n\t"
- "pmullw %%mm2, %%mm7\n\t"
- "paddsw %%mm5, %%mm3\n\t"
- "paddsw %%mm5, %%mm6\n\t"
- "paddsw %%mm4, %%mm3\n\t"
- "paddsw %%mm7, %%mm6\n\t"
- "psraw $6, %%mm3\n\t"
- "psraw $6, %%mm6\n\t"
- "packuswb %%mm6, %%mm3\n\t"
- "movq %%mm3, (%%ecx)\n\t"
- "add %%edi, %%ecx\n\t"
- "add $-1, %%esi\n\t"
- "jnz 2b\n\t"
- "jmp 5f\n\t"
- "3:\n\t"
- "movq (%%eax), %%mm3\n\t"
- "movq (%%ebx), %%mm4\n\t"
- "movq %%mm3, %%mm6\n\t"
- "movq %%mm4, %%mm7\n\t"
- "punpcklbw %%mm0, %%mm3\n\t"
- "punpcklbw %%mm0, %%mm4\n\t"
- "punpckhbw %%mm0, %%mm6\n\t"
- "punpckhbw %%mm0, %%mm7\n\t"
- "pmullw %%mm1, %%mm3\n\t"
- "pmullw %%mm2, %%mm4\n\t"
- "pmullw %%mm1, %%mm6\n\t"
- "pmullw %%mm2, %%mm7\n\t"
- "paddsw %%mm5, %%mm3\n\t"
- "paddsw %%mm5, %%mm6\n\t"
- "paddsw %%mm4, %%mm3\n\t"
- "paddsw %%mm7, %%mm6\n\t"
- "psraw $6, %%mm3\n\t"
- "psraw $6, %%mm6\n\t"
- "packuswb %%mm6, %%mm3\n\t"
- "movq %%mm3, (%%ecx)\n\t"
- "movq 8(%%eax), %%mm3\n\t"
- "movq 8(%%ebx), %%mm4\n\t"
- "add $16, %%eax\n\t"
- "add $16, %%ebx\n\t"
- "movq %%mm3, %%mm6\n\t"
- "movq %%mm4, %%mm7\n\t"
- "punpcklbw %%mm0, %%mm3\n\t"
- "punpcklbw %%mm0, %%mm4\n\t"
- "punpckhbw %%mm0, %%mm6\n\t"
- "punpckhbw %%mm0, %%mm7\n\t"
- "pmullw %%mm1, %%mm3\n\t"
- "pmullw %%mm2, %%mm4\n\t"
- "pmullw %%mm1, %%mm6\n\t"
- "pmullw %%mm2, %%mm7\n\t"
- "paddsw %%mm5, %%mm3\n\t"
- "paddsw %%mm5, %%mm6\n\t"
- "paddsw %%mm4, %%mm3\n\t"
- "paddsw %%mm7, %%mm6\n\t"
- "psraw $6, %%mm3\n\t"
- "psraw $6, %%mm6\n\t"
- "packuswb %%mm6, %%mm3\n\t"
- "movq %%mm3, 8(%%ecx)\n\t"
- "add %%edi, %%ecx\n\t"
- "add $-1, %%esi\n\t"
- "jnz 3b\n\t"
- "5:\n\t"
- "pop %%edi\n\t"
- "pop %%ebx\n\t"
- "pop %%esi\n\t"
- "pop %%ebp\n\t"
- "emms"
- :
- : "m"(weight), "m"(src0), "m"(src1), "m"(dst), "m"(width), "m"(height), "m"(stride));
- #else
- int w0 = *weight++;
- int w1 = *weight;
- stride -= width;
- width = (unsigned)width >> 2;
- do {
- int x = width;
- do {
- dst[0] = CLIP255C((*src1++ * w1 + *src0++ * w0 + (1 << 5)) >> 6);
- dst[1] = CLIP255C((*src1++ * w1 + *src0++ * w0 + (1 << 5)) >> 6);
- dst[2] = CLIP255C((*src1++ * w1 + *src0++ * w0 + (1 << 5)) >> 6);
- dst[3] = CLIP255C((*src1++ * w1 + *src0++ * w0 + (1 << 5)) >> 6);
- dst += 4;
- } while (--x);
- dst += stride;
- } while (--height);
- #endif
- }
Add Comment
Please, Sign In to add comment