Guest User

Untitled

a guest
Sep 22nd, 2018
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.09 KB | None | 0 0
  1. extern "C" const unsigned long long
  2. #if !defined(__CYGWIN__) && !defined(__MINGW32__)
  3. _weighted_rnd[]
  4. #else
  5. weighted_rnd[]
  6. #endif
  7. __attribute__((used)) = {
  8. 0x0020002000200020ULL
  9. };
  10. #endif
  11.  
  12. static inline void add_bidir_weighted_type2_calc(const int16_t weight[], const uint8_t *src0, const uint8_t *src1, uint8_t *dst, int width, int height, int stride)
  13. {
  14. #if defined(__GNUC__) && defined(__i386__)
  15. asm volatile ("\n\t"
  16. "push %%ebp\n\t"
  17. "push %%esi\n\t"
  18. "push %%ebx\n\t"
  19. "push %%edi\n\t"
  20. "movl %0, %%esi\n\t"
  21. "movsxw (%%esi), %%eax\n\t"
  22. "movsxw 2(%%esi), %%ecx\n\t"
  23. "movd %%eax, %%mm1\n\t"
  24. "movd %%ecx, %%mm2\n\t"
  25. "movl %1, %%eax\n\t"
  26. "movl %2, %%ebx\n\t"
  27. "movl %3, %%ecx\n\t"
  28. "movl %4, %%edx\n\t"
  29. "movl %5, %%esi\n\t"
  30. "movl %6, %%edi\n\t"
  31. "movq %%mm1, %%mm3\n\t"
  32. "movq %%mm2, %%mm4\n\t"
  33. "punpcklwd %%mm3, %%mm1\n\t"
  34. "punpcklwd %%mm4, %%mm2\n\t"
  35. "movq %%mm1, %%mm3\n\t"
  36. "movq %%mm2, %%mm4\n\t"
  37. "punpckldq %%mm3, %%mm1\n\t"
  38. "punpckldq %%mm4, %%mm2\n\t"
  39. "movq _weighted_rnd, %%mm5\n\t"
  40. "pxor %%mm0, %%mm0\n\t"
  41. "cmp $4, %%edx\n\t"
  42. "jne 1f\n\t"
  43. "0:\n\t"
  44. "movd (%%eax), %%mm3\n\t"
  45. "movd (%%ebx), %%mm4\n\t"
  46. "add $4, %%eax\n\t"
  47. "add $4, %%ebx\n\t"
  48. "punpcklbw %%mm0, %%mm3\n\t"
  49. "punpcklbw %%mm0, %%mm4\n\t"
  50. "pmullw %%mm1, %%mm3\n\t"
  51. "pmullw %%mm2, %%mm4\n\t"
  52. "paddsw %%mm5, %%mm3\n\t"
  53. "paddsw %%mm4, %%mm3\n\t"
  54. "psraw $6, %%mm3\n\t"
  55. "packuswb %%mm0, %%mm3\n\t"
  56. "movd %%mm3, (%%ecx)\n\t"
  57. "add %%edi, %%ecx\n\t"
  58. "add $-1, %%esi\n\t"
  59. "jnz 0b\n\t"
  60. "jmp 5f\n\t"
  61. "1:\n\t"
  62. "cmp $8, %%edx\n\t"
  63. "jne 3f\n\t"
  64. "2:\n\t"
  65. "movq (%%eax), %%mm3\n\t"
  66. "movq (%%ebx), %%mm4\n\t"
  67. "add $8, %%eax\n\t"
  68. "add $8, %%ebx\n\t"
  69. "movq %%mm3, %%mm6\n\t"
  70. "movq %%mm4, %%mm7\n\t"
  71. "punpcklbw %%mm0, %%mm3\n\t"
  72. "punpcklbw %%mm0, %%mm4\n\t"
  73. "punpckhbw %%mm0, %%mm6\n\t"
  74. "punpckhbw %%mm0, %%mm7\n\t"
  75. "pmullw %%mm1, %%mm3\n\t"
  76. "pmullw %%mm2, %%mm4\n\t"
  77. "pmullw %%mm1, %%mm6\n\t"
  78. "pmullw %%mm2, %%mm7\n\t"
  79. "paddsw %%mm5, %%mm3\n\t"
  80. "paddsw %%mm5, %%mm6\n\t"
  81. "paddsw %%mm4, %%mm3\n\t"
  82. "paddsw %%mm7, %%mm6\n\t"
  83. "psraw $6, %%mm3\n\t"
  84. "psraw $6, %%mm6\n\t"
  85. "packuswb %%mm6, %%mm3\n\t"
  86. "movq %%mm3, (%%ecx)\n\t"
  87. "add %%edi, %%ecx\n\t"
  88. "add $-1, %%esi\n\t"
  89. "jnz 2b\n\t"
  90. "jmp 5f\n\t"
  91. "3:\n\t"
  92. "movq (%%eax), %%mm3\n\t"
  93. "movq (%%ebx), %%mm4\n\t"
  94. "movq %%mm3, %%mm6\n\t"
  95. "movq %%mm4, %%mm7\n\t"
  96. "punpcklbw %%mm0, %%mm3\n\t"
  97. "punpcklbw %%mm0, %%mm4\n\t"
  98. "punpckhbw %%mm0, %%mm6\n\t"
  99. "punpckhbw %%mm0, %%mm7\n\t"
  100. "pmullw %%mm1, %%mm3\n\t"
  101. "pmullw %%mm2, %%mm4\n\t"
  102. "pmullw %%mm1, %%mm6\n\t"
  103. "pmullw %%mm2, %%mm7\n\t"
  104. "paddsw %%mm5, %%mm3\n\t"
  105. "paddsw %%mm5, %%mm6\n\t"
  106. "paddsw %%mm4, %%mm3\n\t"
  107. "paddsw %%mm7, %%mm6\n\t"
  108. "psraw $6, %%mm3\n\t"
  109. "psraw $6, %%mm6\n\t"
  110. "packuswb %%mm6, %%mm3\n\t"
  111. "movq %%mm3, (%%ecx)\n\t"
  112.  
  113. "movq 8(%%eax), %%mm3\n\t"
  114. "movq 8(%%ebx), %%mm4\n\t"
  115. "add $16, %%eax\n\t"
  116. "add $16, %%ebx\n\t"
  117. "movq %%mm3, %%mm6\n\t"
  118. "movq %%mm4, %%mm7\n\t"
  119. "punpcklbw %%mm0, %%mm3\n\t"
  120. "punpcklbw %%mm0, %%mm4\n\t"
  121. "punpckhbw %%mm0, %%mm6\n\t"
  122. "punpckhbw %%mm0, %%mm7\n\t"
  123. "pmullw %%mm1, %%mm3\n\t"
  124. "pmullw %%mm2, %%mm4\n\t"
  125. "pmullw %%mm1, %%mm6\n\t"
  126. "pmullw %%mm2, %%mm7\n\t"
  127. "paddsw %%mm5, %%mm3\n\t"
  128. "paddsw %%mm5, %%mm6\n\t"
  129. "paddsw %%mm4, %%mm3\n\t"
  130. "paddsw %%mm7, %%mm6\n\t"
  131. "psraw $6, %%mm3\n\t"
  132. "psraw $6, %%mm6\n\t"
  133. "packuswb %%mm6, %%mm3\n\t"
  134. "movq %%mm3, 8(%%ecx)\n\t"
  135.  
  136. "add %%edi, %%ecx\n\t"
  137. "add $-1, %%esi\n\t"
  138. "jnz 3b\n\t"
  139. "5:\n\t"
  140. "pop %%edi\n\t"
  141. "pop %%ebx\n\t"
  142. "pop %%esi\n\t"
  143. "pop %%ebp\n\t"
  144. "emms"
  145. :
  146. : "m"(weight), "m"(src0), "m"(src1), "m"(dst), "m"(width), "m"(height), "m"(stride));
  147. #else
  148. int w0 = *weight++;
  149. int w1 = *weight;
  150. stride -= width;
  151. width = (unsigned)width >> 2;
  152. do {
  153. int x = width;
  154. do {
  155. dst[0] = CLIP255C((*src1++ * w1 + *src0++ * w0 + (1 << 5)) >> 6);
  156. dst[1] = CLIP255C((*src1++ * w1 + *src0++ * w0 + (1 << 5)) >> 6);
  157. dst[2] = CLIP255C((*src1++ * w1 + *src0++ * w0 + (1 << 5)) >> 6);
  158. dst[3] = CLIP255C((*src1++ * w1 + *src0++ * w0 + (1 << 5)) >> 6);
  159. dst += 4;
  160. } while (--x);
  161. dst += stride;
  162. } while (--height);
  163. #endif
  164. }
Add Comment
Please, Sign In to add comment