Advertisement
Guest User

Untitled

a guest
Jul 7th, 2017
542
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 23.92 KB | None | 0 0
  1. From 1b9409b5264ee17651a4467529a841b8de7545f7 Mon Sep 17 00:00:00 2001
  2. From: Daniel Kang <daniel.d.kang@gmail.com>
  3. Date: Mon, 27 Dec 2010 17:11:09 -0500
  4. Subject: [PATCH 1/5] pred8x8_top_dc_mmxext
  5.  
  6. ---
  7. libavcodec/x86/h264_intrapred.asm    |   33 +++++++++++++++++++++++++++++++++
  8.  libavcodec/x86/h264_intrapred_init.c |    4 ++++
  9.  2 files changed, 37 insertions(+), 0 deletions(-)
  10.  
  11. diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
  12. index 14a6038..1cb22f5 100644
  13. --- a/libavcodec/x86/h264_intrapred.asm
  14. +++ b/libavcodec/x86/h264_intrapred.asm
  15. @@ -828,6 +828,39 @@ PRED8x8_H mmxext
  16.  PRED8x8_H ssse3
  17.  
  18.  ;-----------------------------------------------------------------------------
  19. +; void pred8x8_top_dc_mmxext(uint8_t *src, int stride)
  20. +;-----------------------------------------------------------------------------
  21. +cglobal pred8x8_top_dc_mmxext, 2,5
  22. +    sub         r0, r1
  23. +    movq       mm0, [r0]
  24. +    pxor       mm1, mm1
  25. +    pxor       mm2, mm2
  26. +    lea         r2, [r0+r1*2]
  27. +    punpckhbw  mm1, mm0
  28. +    punpcklbw  mm0, mm2
  29. +    psadbw     mm1, mm2        ; s1
  30. +    lea         r3, [r2+r1*2]
  31. +    psadbw     mm0, mm2        ; s0
  32. +    psrlw      mm1, 1
  33. +    psrlw      mm0, 1
  34. +    pavgw      mm1, mm2
  35. +    lea         r4, [r3+r1*2]
  36. +    pavgw      mm0, mm2
  37. +    pshufw     mm1, mm1, 0
  38. +    pshufw     mm0, mm0, 0     ; dc0 (w)
  39. +    packuswb   mm0, mm1        ; dc0,dc1 (b)
  40. +    movq [r0+r1*1], mm0
  41. +    movq [r0+r1*2], mm0
  42. +    lea         r0, [r3+r1*2]
  43. +    movq [r2+r1*1], mm0
  44. +    movq [r2+r1*2], mm0
  45. +    movq [r3+r1*1], mm0
  46. +    movq [r3+r1*2], mm0
  47. +    movq [r0+r1*1], mm0
  48. +    movq [r0+r1*2], mm0
  49. +    RET
  50. +
  51. +;-----------------------------------------------------------------------------
  52.  ; void pred8x8_dc_rv40(uint8_t *src, int stride)
  53.  ;-----------------------------------------------------------------------------
  54.  
  55. diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
  56. index 10a6dd6..57a93f7 100644
  57. --- a/libavcodec/x86/h264_intrapred_init.c
  58. +++ b/libavcodec/x86/h264_intrapred_init.c
  59. @@ -44,6 +44,7 @@ void ff_pred16x16_plane_svq3_ssse3 (uint8_t *src, int stride);
  60.  void ff_pred16x16_tm_vp8_mmx       (uint8_t *src, int stride);
  61.  void ff_pred16x16_tm_vp8_mmxext    (uint8_t *src, int stride);
  62.  void ff_pred16x16_tm_vp8_sse2      (uint8_t *src, int stride);
  63. +void ff_pred8x8_top_dc_mmxext      (uint8_t *src, int stride);
  64.  void ff_pred8x8_dc_rv40_mmxext     (uint8_t *src, int stride);
  65.  void ff_pred8x8_vertical_mmx       (uint8_t *src, int stride);
  66.  void ff_pred8x8_horizontal_mmx     (uint8_t *src, int stride);
  67. @@ -97,6 +98,9 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
  68.          h->pred4x4  [DC_PRED     ] = ff_pred4x4_dc_mmxext;
  69.          if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
  70.              h->pred4x4  [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
  71. +        if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) {
  72. +            h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_mmxext;
  73. +        }
  74.          if (codec_id == CODEC_ID_VP8) {
  75.              h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext;
  76.              h->pred8x8  [DC_PRED8x8   ] = ff_pred8x8_dc_rv40_mmxext;
  77. --
  78. 1.7.2.2
  79.  
  80.  
  81. From fbafcfba182db3d9539a62cefb028a087e0c473e Mon Sep 17 00:00:00 2001
  82. From: Daniel Kang <daniel.d.kang@gmail.com>
  83. Date: Mon, 27 Dec 2010 17:42:14 -0500
  84. Subject: [PATCH 2/5] pred8x8_dc_mmxext
  85.  
  86. ---
  87. libavcodec/x86/h264_intrapred.asm    |   61 ++++++++++++++++++++++++++++++++++
  88.  libavcodec/x86/h264_intrapred_init.c |    2 +
  89.  2 files changed, 63 insertions(+), 0 deletions(-)
  90.  
  91. diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
  92. index 1cb22f5..61a587d 100644
  93. --- a/libavcodec/x86/h264_intrapred.asm
  94. +++ b/libavcodec/x86/h264_intrapred.asm
  95. @@ -861,6 +861,67 @@ cglobal pred8x8_top_dc_mmxext, 2,5
  96.      RET
  97.  
  98.  ;-----------------------------------------------------------------------------
  99. +; void pred8x8_dc_mmxext(uint8_t *src, int stride)
  100. +;-----------------------------------------------------------------------------
  101. +
  102. +INIT_MMX
  103. +cglobal pred8x8_dc_mmxext, 2,5
  104. +    sub       r0, r1
  105. +    pxor      m7, m7
  106. +    movd      m0, [r0+0]
  107. +    movd      m1, [r0+4]
  108. +    psadbw    m0, m7            ; s0
  109. +    mov       r4, r0
  110. +    psadbw    m1, m7            ; s1
  111. +
  112. +    movzx    r2d, byte [r0+r1*1-1]
  113. +    movzx    r3d, byte [r0+r1*2-1]
  114. +    lea       r0, [r0+r1*2]
  115. +    add      r2d, r3d
  116. +    movzx    r3d, byte [r0+r1*1-1]
  117. +    add      r2d, r3d
  118. +    movzx    r3d, byte [r0+r1*2-1]
  119. +    add      r2d, r3d
  120. +    lea       r0, [r0+r1*2]
  121. +    movd      m2, r2d            ; s2
  122. +    movzx    r2d, byte [r0+r1*1-1]
  123. +    movzx    r3d, byte [r0+r1*2-1]
  124. +    lea       r0, [r0+r1*2]
  125. +    add      r2d, r3d
  126. +    movzx    r3d, byte [r0+r1*1-1]
  127. +    add      r2d, r3d
  128. +    movzx    r3d, byte [r0+r1*2-1]
  129. +    add      r2d, r3d
  130. +    movd      m3, r2d            ; s3
  131. +
  132. +    punpcklwd m0, m1
  133. +    mov       r0, r4
  134. +    punpcklwd m2, m3
  135. +    punpckldq m0, m2            ; s0, s1, s2, s3
  136. +    pshufw    m3, m0, 11110110b ; s2, s1, s3, s3
  137. +    lea       r2, [r0+r1*2]
  138. +    pshufw    m0, m0, 01110100b ; s0, s1, s3, s1
  139. +    paddw     m0, m3
  140. +    lea       r3, [r2+r1*2]
  141. +    psrlw     m0, 2
  142. +    pavgw     m0, m7            ; s0+s2, s1, s3, s1+s3
  143. +    lea       r4, [r3+r1*2]
  144. +    packuswb  m0, m0
  145. +    punpcklbw m0, m0
  146. +    movq      m1, m0
  147. +    punpcklbw m0, m0
  148. +    punpckhbw m1, m1
  149. +    movq [r0+r1*1], m0
  150. +    movq [r0+r1*2], m0
  151. +    movq [r2+r1*1], m0
  152. +    movq [r2+r1*2], m0
  153. +    movq [r3+r1*1], m1
  154. +    movq [r3+r1*2], m1
  155. +    movq [r4+r1*1], m1
  156. +    movq [r4+r1*2], m1
  157. +    RET
  158. +
  159. +;-----------------------------------------------------------------------------
  160.  ; void pred8x8_dc_rv40(uint8_t *src, int stride)
  161.  ;-----------------------------------------------------------------------------
  162.  
  163. diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
  164. index 57a93f7..8f6fd2f 100644
  165. --- a/libavcodec/x86/h264_intrapred_init.c
  166. +++ b/libavcodec/x86/h264_intrapred_init.c
  167. @@ -46,6 +46,7 @@ void ff_pred16x16_tm_vp8_mmxext    (uint8_t *src, int stride);
  168.  void ff_pred16x16_tm_vp8_sse2      (uint8_t *src, int stride);
  169.  void ff_pred8x8_top_dc_mmxext      (uint8_t *src, int stride);
  170.  void ff_pred8x8_dc_rv40_mmxext     (uint8_t *src, int stride);
  171. +void ff_pred8x8_dc_mmxext          (uint8_t *src, int stride);
  172.  void ff_pred8x8_vertical_mmx       (uint8_t *src, int stride);
  173.  void ff_pred8x8_horizontal_mmx     (uint8_t *src, int stride);
  174.  void ff_pred8x8_horizontal_mmxext  (uint8_t *src, int stride);
  175. @@ -100,6 +101,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
  176.              h->pred4x4  [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
  177.          if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) {
  178.              h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_mmxext;
  179. +            h->pred8x8[DC_PRED8x8     ] = ff_pred8x8_dc_mmxext;
  180.          }
  181.          if (codec_id == CODEC_ID_VP8) {
  182.              h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext;
  183. --
  184. 1.7.2.2
  185.  
  186.  
  187. From 8196e167180f448d637a19e32fb79c9a05c2b71d Mon Sep 17 00:00:00 2001
  188. From: Daniel Kang <daniel.d.kang@gmail.com>
  189. Date: Mon, 27 Dec 2010 17:56:00 -0500
  190. Subject: [PATCH 3/5] pred8x8l_top_dc_(mmx|ssse3)
  191.  
  192. ---
  193. libavcodec/x86/h264_intrapred.asm    |   83 +++++++++++++++++++++++++++++-----
  194.  libavcodec/x86/h264_intrapred_init.c |    4 ++
  195.  2 files changed, 75 insertions(+), 12 deletions(-)
  196.  
  197. diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
  198. index 61a587d..c1b7886 100644
  199. --- a/libavcodec/x86/h264_intrapred.asm
  200. +++ b/libavcodec/x86/h264_intrapred.asm
  201. @@ -20,6 +20,7 @@
  202.  ;******************************************************************************
  203.  
  204.  %include "x86inc.asm"
  205. +%include "x86util.asm"
  206.  
  207.  SECTION_RODATA
  208.  
  209. @@ -37,6 +38,7 @@ SECTION .text
  210.  
  211.  cextern pb_1
  212.  cextern pb_3
  213. +cextern pw_4
  214.  cextern pw_5
  215.  cextern pw_16
  216.  cextern pw_17
  217. @@ -1060,6 +1062,75 @@ cglobal pred8x8_tm_vp8_ssse3, 2,3,6
  218.      jg .loop
  219.      REP_RET
  220.  
  221. +; dest, left, right, src, tmp
  222. +; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
  223. +%macro PRED4x4_LOWPASS 5
  224. +    mova    %5, %2
  225. +    pavgb   %2, %3
  226. +    pxor    %3, %5
  227. +    mova    %1, %4
  228. +    pand    %3, [pb_1]
  229. +    psubusb %2, %3
  230. +    pavgb   %1, %2
  231. +%endmacro
  232. +
  233. +;-----------------------------------------------------------------------------
  234. +; void pred8x8l_top_dc(uint8_t *src, int has_topleft, int has_topright, int stride)
  235. +;-----------------------------------------------------------------------------
  236. +
  237. +%macro PRED8x8L_TOP_DC 1
  238. +cglobal pred8x8l_top_dc_%1, 4,4
  239. +    sub          r0, r3
  240. +    pxor        mm7, mm7
  241. +    movq        mm0, [r0-8]
  242. +    movq        mm3, [r0]
  243. +    movq        mm1, [r0+8]
  244. +    movq        mm2, mm3
  245. +    movq        mm4, mm3
  246. +    PALIGNR     mm2, mm0, 7, mm0
  247. +    PALIGNR     mm1, mm4, 1, mm4
  248. +    test         r1, r1 ; top_left
  249. +    jz .fix_lt_2
  250. +    test         r2, r2 ; top_right
  251. +    jz .fix_tr_1
  252. +    jmp .body
  253. +.fix_lt_2:
  254. +    movq        mm5, mm3
  255. +    pxor        mm5, mm2
  256. +    psllq       mm5, 56
  257. +    psrlq       mm5, 56
  258. +    pxor        mm2, mm5
  259. +    test         r2, r2 ; top_right
  260. +    jnz .body
  261. +.fix_tr_1:
  262. +    movq        mm5, mm3
  263. +    pxor        mm5, mm1
  264. +    psrlq       mm5, 56
  265. +    psllq       mm5, 56
  266. +    pxor        mm1, mm5
  267. +.body
  268. +    PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5
  269. +    psadbw   mm7, mm0
  270. +    paddw    mm7, [pw_4]
  271. +    psrlw    mm7, 3
  272. +    pshufw   mm7, mm7, 0
  273. +    packuswb mm7, mm7
  274. +%rep 3
  275. +    movq [r0+r3*1], mm7
  276. +    movq [r0+r3*2], mm7
  277. +    lea    r0, [r0+r3*2]
  278. +%endrep
  279. +    movq [r0+r3*1], mm7
  280. +    movq [r0+r3*2], mm7
  281. +    RET
  282. +%endmacro
  283. +
  284. +INIT_MMX
  285. +%define PALIGNR PALIGNR_MMX
  286. +PRED8x8L_TOP_DC mmxext
  287. +%define PALIGNR PALIGNR_SSSE3
  288. +PRED8x8L_TOP_DC ssse3
  289. +
  290.  ;-----------------------------------------------------------------------------
  291.  ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride)
  292.  ;-----------------------------------------------------------------------------
  293. @@ -1167,18 +1238,6 @@ cglobal pred4x4_tm_vp8_ssse3, 3,3
  294.      movd [r1+r2*2], mm5
  295.      RET
  296.  
  297. -; dest, left, right, src, tmp
  298. -; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
  299. -%macro PRED4x4_LOWPASS 5
  300. -    mova    %5, %2
  301. -    pavgb   %2, %3
  302. -    pxor    %3, %5
  303. -    mova    %1, %4
  304. -    pand    %3, [pb_1]
  305. -    psubusb %2, %3
  306. -    pavgb   %1, %2
  307. -%endmacro
  308. -
  309.  ;-----------------------------------------------------------------------------
  310.  ; void pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride)
  311.  ;-----------------------------------------------------------------------------
  312. diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
  313. index 8f6fd2f..1a5c6de 100644
  314. --- a/libavcodec/x86/h264_intrapred_init.c
  315. +++ b/libavcodec/x86/h264_intrapred_init.c
  316. @@ -59,6 +59,8 @@ void ff_pred8x8_tm_vp8_mmx         (uint8_t *src, int stride);
  317.  void ff_pred8x8_tm_vp8_mmxext      (uint8_t *src, int stride);
  318.  void ff_pred8x8_tm_vp8_sse2        (uint8_t *src, int stride);
  319.  void ff_pred8x8_tm_vp8_ssse3       (uint8_t *src, int stride);
  320. +void ff_pred8x8l_top_dc_mmxext     (uint8_t *src, int has_topleft, int has_topright, int stride);
  321. +void ff_pred8x8l_top_dc_ssse3      (uint8_t *src, int has_topleft, int has_topright, int stride);
  322.  void ff_pred4x4_dc_mmxext          (uint8_t *src, const uint8_t *topright, int stride);
  323.  void ff_pred4x4_down_left_mmxext   (uint8_t *src, const uint8_t *topright, int stride);
  324.  void ff_pred4x4_tm_vp8_mmx         (uint8_t *src, const uint8_t *topright, int stride);
  325. @@ -96,6 +98,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
  326.          h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext;
  327.          h->pred16x16[DC_PRED8x8  ] = ff_pred16x16_dc_mmxext;
  328.          h->pred8x8  [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
  329. +        h->pred8x8l[TOP_DC_PRED  ] = ff_pred8x8l_top_dc_mmxext;
  330.          h->pred4x4  [DC_PRED     ] = ff_pred4x4_dc_mmxext;
  331.          if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
  332.              h->pred4x4  [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
  333. @@ -146,6 +149,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
  334.          h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3;
  335.          h->pred16x16[DC_PRED8x8  ] = ff_pred16x16_dc_ssse3;
  336.          h->pred8x8  [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3;
  337. +        h->pred8x8l[TOP_DC_PRED  ] = ff_pred8x8l_top_dc_ssse3;
  338.          if (codec_id == CODEC_ID_VP8) {
  339.              h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3;
  340.              h->pred4x4  [TM_VP8_PRED  ] = ff_pred4x4_tm_vp8_ssse3;
  341. --
  342. 1.7.2.2
  343.  
  344.  
  345. From 26d0c78dc0149a60d128765e48660f99c5a978f0 Mon Sep 17 00:00:00 2001
  346. From: Daniel Kang <daniel.d.kang@gmail.com>
  347. Date: Mon, 27 Dec 2010 18:35:22 -0500
  348. Subject: [PATCH 4/5] pred8x8l_dc_(mmx|ssse3)
  349.  
  350. ---
  351. libavcodec/x86/h264_intrapred.asm    |  104 ++++++++++++++++++++++++++++++++++
  352.  libavcodec/x86/h264_intrapred_init.c |    4 +
  353.  2 files changed, 108 insertions(+), 0 deletions(-)
  354.  
  355. diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
  356. index c1b7886..dcc8d67 100644
  357. --- a/libavcodec/x86/h264_intrapred.asm
  358. +++ b/libavcodec/x86/h264_intrapred.asm
  359. @@ -40,6 +40,7 @@ cextern pb_1
  360.  cextern pb_3
  361.  cextern pw_4
  362.  cextern pw_5
  363. +cextern pw_8
  364.  cextern pw_16
  365.  cextern pw_17
  366.  cextern pw_32
  367. @@ -1132,6 +1133,109 @@ PRED8x8L_TOP_DC mmxext
  368.  PRED8x8L_TOP_DC ssse3
  369.  
  370.  ;-----------------------------------------------------------------------------
  371. +;void pred8x8l_dc(uint8_t *src, int has_topleft, int has_topright, int stride)
  372. +;-----------------------------------------------------------------------------
  373. +
  374. +%macro PRED8x8L_DC 1
  375. +cglobal pred8x8l_dc_%1, 4,5
  376. +    sub          r0, r3
  377. +    lea          r4, [r0+r3*2]
  378. +    movq        mm0, [r0+r3*1-8]
  379. +    punpckhbw   mm0, [r0+r3*0-8]
  380. +    movq        mm1, [r4+r3*1-8]
  381. +    punpckhbw   mm1, [r0+r3*2-8]
  382. +    mov          r4, r0
  383. +    punpckhwd   mm1, mm0
  384. +    lea          r0, [r0+r3*4]
  385. +    movq        mm2, [r0+r3*1-8]
  386. +    punpckhbw   mm2, [r0+r3*0-8]
  387. +    lea          r0, [r0+r3*2]
  388. +    movq        mm3, [r0+r3*1-8]
  389. +    punpckhbw   mm3, [r0+r3*0-8]
  390. +    punpckhwd   mm3, mm2
  391. +    punpckhdq   mm3, mm1
  392. +    lea          r0, [r0+r3*2]
  393. +    movq        mm0, [r0+r3*0-8]
  394. +    movq        mm1, [r4]
  395. +    mov          r0, r4
  396. +    movq        mm4, mm3
  397. +    movq        mm2, mm3
  398. +    PALIGNR     mm4, mm0, 7, mm0
  399. +    PALIGNR     mm1, mm2, 1, mm2
  400. +    test        r1, r1
  401. +    jnz .do_left
  402. +.fix_lt_1:
  403. +    movq        mm5, mm3
  404. +    pxor        mm5, mm4
  405. +    psrlq       mm5, 56
  406. +    psllq       mm5, 48
  407. +    pxor        mm1, mm5
  408. +    jmp .do_left
  409. +.fix_lt_2:
  410. +    movq        mm5, mm3
  411. +    pxor        mm5, mm2
  412. +    psllq       mm5, 56
  413. +    psrlq       mm5, 56
  414. +    pxor        mm2, mm5
  415. +    test         r2, r2
  416. +    jnz .body
  417. +.fix_tr_1:
  418. +    movq        mm5, mm3
  419. +    pxor        mm5, mm1
  420. +    psrlq       mm5, 56
  421. +    psllq       mm5, 56
  422. +    pxor        mm1, mm5
  423. +    jmp .body
  424. +.do_left:
  425. +    movq        mm0, mm4
  426. +    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
  427. +    movq        mm4, mm0
  428. +    movq        mm7, mm2
  429. +    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
  430. +    psllq       mm1, 56
  431. +    PALIGNR     mm7, mm1, 7, mm3
  432. +    movq        mm0, [r0-8]
  433. +    movq        mm3, [r0]
  434. +    movq        mm1, [r0+8]
  435. +    movq        mm2, mm3
  436. +    movq        mm4, mm3
  437. +    PALIGNR     mm2, mm0, 7, mm0
  438. +    PALIGNR     mm1, mm4, 1, mm4
  439. +    test         r1, r1
  440. +    jz .fix_lt_2
  441. +    test         r2, r2
  442. +    jz .fix_tr_1
  443. +.body
  444. +    lea          r1, [r0+r3*2]
  445. +    PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
  446. +    pxor        mm0, mm0
  447. +    pxor        mm1, mm1
  448. +    lea          r2, [r1+r3*2]
  449. +    psadbw      mm0, mm7
  450. +    psadbw      mm1, mm6
  451. +    paddw       mm0, [pw_8]
  452. +    paddw       mm0, mm1
  453. +    lea          r4, [r2+r3*2]
  454. +    psrlw       mm0, 4
  455. +    pshufw      mm0, mm0, 0
  456. +    packuswb    mm0, mm0
  457. +    movq [r0+r3*1], mm0
  458. +    movq [r0+r3*2], mm0
  459. +    movq [r1+r3*1], mm0
  460. +    movq [r1+r3*2], mm0
  461. +    movq [r2+r3*1], mm0
  462. +    movq [r2+r3*2], mm0
  463. +    movq [r4+r3*1], mm0
  464. +    movq [r4+r3*2], mm0
  465. +    RET
  466. +%endmacro
  467. +INIT_MMX
  468. +%define PALIGNR PALIGNR_MMX
  469. +PRED8x8L_DC mmxext
  470. +%define PALIGNR PALIGNR_SSSE3
  471. +PRED8x8L_DC ssse3
  472. +
  473. +;-----------------------------------------------------------------------------
  474.  ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride)
  475.  ;-----------------------------------------------------------------------------
  476.  
  477. diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
  478. index 1a5c6de..b8c7ff1 100644
  479. --- a/libavcodec/x86/h264_intrapred_init.c
  480. +++ b/libavcodec/x86/h264_intrapred_init.c
  481. @@ -61,6 +61,8 @@ void ff_pred8x8_tm_vp8_sse2        (uint8_t *src, int stride);
  482.  void ff_pred8x8_tm_vp8_ssse3       (uint8_t *src, int stride);
  483.  void ff_pred8x8l_top_dc_mmxext     (uint8_t *src, int has_topleft, int has_topright, int stride);
  484.  void ff_pred8x8l_top_dc_ssse3      (uint8_t *src, int has_topleft, int has_topright, int stride);
  485. +void ff_pred8x8l_dc_mmxext         (uint8_t *src, int has_topleft, int has_topright, int stride);
  486. +void ff_pred8x8l_dc_ssse3          (uint8_t *src, int has_topleft, int has_topright, int stride);
  487.  void ff_pred4x4_dc_mmxext          (uint8_t *src, const uint8_t *topright, int stride);
  488.  void ff_pred4x4_down_left_mmxext   (uint8_t *src, const uint8_t *topright, int stride);
  489.  void ff_pred4x4_tm_vp8_mmx         (uint8_t *src, const uint8_t *topright, int stride);
  490. @@ -99,6 +101,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
  491.          h->pred16x16[DC_PRED8x8  ] = ff_pred16x16_dc_mmxext;
  492.          h->pred8x8  [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
  493.          h->pred8x8l[TOP_DC_PRED  ] = ff_pred8x8l_top_dc_mmxext;
  494. +        h->pred8x8l[DC_PRED      ] = ff_pred8x8l_dc_mmxext;
  495.          h->pred4x4  [DC_PRED     ] = ff_pred4x4_dc_mmxext;
  496.          if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
  497.              h->pred4x4  [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
  498. @@ -150,6 +153,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
  499.          h->pred16x16[DC_PRED8x8  ] = ff_pred16x16_dc_ssse3;
  500.          h->pred8x8  [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3;
  501.          h->pred8x8l[TOP_DC_PRED  ] = ff_pred8x8l_top_dc_ssse3;
  502. +        h->pred8x8l[DC_PRED      ] = ff_pred8x8l_dc_ssse3;
  503.          if (codec_id == CODEC_ID_VP8) {
  504.              h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3;
  505.              h->pred4x4  [TM_VP8_PRED  ] = ff_pred4x4_tm_vp8_ssse3;
  506. --
  507. 1.7.2.2
  508.  
  509.  
  510. From 9535a1017f05a108370bfd9a4bf28a0dfaf89bab Mon Sep 17 00:00:00 2001
  511. From: Daniel Kang <daniel.d.kang@gmail.com>
  512. Date: Mon, 27 Dec 2010 21:05:24 -0500
  513. Subject: [PATCH 5/5] pred8x8l_horizontal_(mmx|ssse3)
  514.  
  515. ---
  516. libavcodec/x86/h264_intrapred.asm    |   78 ++++++++++++++++++++++++++++++++++
  517.  libavcodec/x86/h264_intrapred_init.c |    4 ++
  518.  2 files changed, 82 insertions(+), 0 deletions(-)
  519.  
  520. diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
  521. index dcc8d67..ba27e0f 100644
  522. --- a/libavcodec/x86/h264_intrapred.asm
  523. +++ b/libavcodec/x86/h264_intrapred.asm
  524. @@ -1236,6 +1236,84 @@ PRED8x8L_DC mmxext
  525.  PRED8x8L_DC ssse3
  526.  
  527.  ;-----------------------------------------------------------------------------
  528. +; void pred8x8l_horizontal(uint8_t *src, int has_topleft, int has_topright, int stride)
  529. +;-----------------------------------------------------------------------------
  530. +
  531. +%macro PRED8x8L_HORIZONTAL 1
  532. +cglobal pred8x8l_horizontal_%1, 4,4
  533. +    sub          r0, r3
  534. +    lea          r2, [r0+r3*2]
  535. +    movq        mm0, [r0+r3*1-8]
  536. +    punpckhbw   mm0, [r0+r3*0-8]
  537. +    movq        mm1, [r2+r3*1-8]
  538. +    punpckhbw   mm1, [r0+r3*2-8]
  539. +    mov          r2, r0
  540. +    punpckhwd   mm1, mm0
  541. +    lea          r0, [r0+r3*4]
  542. +    movq        mm2, [r0+r3*1-8]
  543. +    punpckhbw   mm2, [r0+r3*0-8]
  544. +    lea          r0, [r0+r3*2]
  545. +    movq        mm3, [r0+r3*1-8]
  546. +    punpckhbw   mm3, [r0+r3*0-8]
  547. +    punpckhwd   mm3, mm2
  548. +    punpckhdq   mm3, mm1
  549. +    lea          r0, [r0+r3*2]
  550. +    movq        mm0, [r0+r3*0-8]
  551. +    movq        mm1, [r2]
  552. +    mov          r0, r2
  553. +    movq        mm4, mm3
  554. +    movq        mm2, mm3
  555. +    PALIGNR     mm4, mm0, 7, mm0
  556. +    PALIGNR     mm1, mm2, 1, mm2
  557. +    test        r1, r1 ; top_left
  558. +    jnz .do_left
  559. +.fix_lt_1:
  560. +    movq        mm5, mm3
  561. +    pxor        mm5, mm4
  562. +    psrlq       mm5, 56
  563. +    psllq       mm5, 48
  564. +    pxor        mm1, mm5
  565. +.do_left:
  566. +    movq        mm0, mm4
  567. +    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
  568. +    movq        mm4, mm0
  569. +    movq        mm7, mm2
  570. +    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
  571. +    psllq       mm1, 56
  572. +    PALIGNR     mm7, mm1, 7, mm3
  573. +    movq        mm3, mm7
  574. +    lea         r1, [r0+r3*2]
  575. +    movq       mm7, mm3
  576. +    punpckhbw  mm3, mm3
  577. +    punpcklbw  mm7, mm7
  578. +    pshufw     mm0, mm3, 0xff
  579. +    pshufw     mm1, mm3, 0xaa
  580. +    lea         r2, [r1+r3*2]
  581. +    pshufw     mm2, mm3, 0x55
  582. +    pshufw     mm3, mm3, 0x00
  583. +    pshufw     mm4, mm7, 0xff
  584. +    pshufw     mm5, mm7, 0xaa
  585. +    pshufw     mm6, mm7, 0x55
  586. +    pshufw     mm7, mm7, 0x00
  587. +    movq [r0+r3*1], mm0
  588. +    movq [r0+r3*2], mm1
  589. +    movq [r1+r3*1], mm2
  590. +    movq [r1+r3*2], mm3
  591. +    movq [r2+r3*1], mm4
  592. +    movq [r2+r3*2], mm5
  593. +    lea         r0, [r2+r3*2]
  594. +    movq [r0+r3*1], mm6
  595. +    movq [r0+r3*2], mm7
  596. +    RET
  597. +%endmacro
  598. +
  599. +INIT_MMX
  600. +%define PALIGNR PALIGNR_MMX
  601. +PRED8x8L_HORIZONTAL mmxext
  602. +%define PALIGNR PALIGNR_SSSE3
  603. +PRED8x8L_HORIZONTAL ssse3
  604. +
  605. +;-----------------------------------------------------------------------------
  606.  ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride)
  607.  ;-----------------------------------------------------------------------------
  608.  
  609. diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
  610. index b8c7ff1..a22c5cd 100644
  611. --- a/libavcodec/x86/h264_intrapred_init.c
  612. +++ b/libavcodec/x86/h264_intrapred_init.c
  613. @@ -63,6 +63,8 @@ void ff_pred8x8l_top_dc_mmxext     (uint8_t *src, int has_topleft, int has_topri
  614.  void ff_pred8x8l_top_dc_ssse3      (uint8_t *src, int has_topleft, int has_topright, int stride);
  615.  void ff_pred8x8l_dc_mmxext         (uint8_t *src, int has_topleft, int has_topright, int stride);
  616.  void ff_pred8x8l_dc_ssse3          (uint8_t *src, int has_topleft, int has_topright, int stride);
  617. +void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
  618. +void ff_pred8x8l_horizontal_ssse3  (uint8_t *src, int has_topleft, int has_topright, int stride);
  619.  void ff_pred4x4_dc_mmxext          (uint8_t *src, const uint8_t *topright, int stride);
  620.  void ff_pred4x4_down_left_mmxext   (uint8_t *src, const uint8_t *topright, int stride);
  621.  void ff_pred4x4_tm_vp8_mmx         (uint8_t *src, const uint8_t *topright, int stride);
  622. @@ -102,6 +104,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
  623.          h->pred8x8  [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
  624.          h->pred8x8l[TOP_DC_PRED  ] = ff_pred8x8l_top_dc_mmxext;
  625.          h->pred8x8l[DC_PRED      ] = ff_pred8x8l_dc_mmxext;
  626. +        h->pred8x8l[HOR_PRED     ] = ff_pred8x8l_horizontal_mmxext;
  627.          h->pred4x4  [DC_PRED     ] = ff_pred4x4_dc_mmxext;
  628.          if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
  629.              h->pred4x4  [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
  630. @@ -154,6 +157,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
  631.          h->pred8x8  [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3;
  632.          h->pred8x8l[TOP_DC_PRED  ] = ff_pred8x8l_top_dc_ssse3;
  633.          h->pred8x8l[DC_PRED      ] = ff_pred8x8l_dc_ssse3;
  634. +        h->pred8x8l[HOR_PRED     ] = ff_pred8x8l_horizontal_ssse3;
  635.          if (codec_id == CODEC_ID_VP8) {
  636.              h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3;
  637.              h->pred4x4  [TM_VP8_PRED  ] = ff_pred4x4_tm_vp8_ssse3;
  638. --
  639. 1.7.2.2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement