Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- From 7d462192dfb66c6b2b3bdbaa841a6a69e5b7848e Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sat, 25 Dec 2010 14:32:11 -0500
- Subject: [PATCH 01/15] pred8x8l_top_dc_mmxext
- ---
- libavcodec/x86/h264_intrapred.asm | 78 ++++++++++++++++++++++++++++-----
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 2 files changed, 68 insertions(+), 12 deletions(-)
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 14a6038..b21516b 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -20,6 +20,7 @@
- ;******************************************************************************
- %include "x86inc.asm"
- +%include "x86util.asm"
- SECTION_RODATA
- @@ -37,6 +38,7 @@ SECTION .text
- cextern pb_1
- cextern pb_3
- +cextern pw_4
- cextern pw_5
- cextern pw_16
- cextern pw_17
- @@ -827,6 +829,70 @@ PRED8x8_H mmx
- PRED8x8_H mmxext
- PRED8x8_H ssse3
- +; dest, left, right, src, tmp
- +; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
- +%macro PRED4x4_LOWPASS 5
- + mova %5, %2
- + pavgb %2, %3
- + pxor %3, %5
- + mova %1, %4
- + pand %3, [pb_1]
- + psubusb %2, %3
- + pavgb %1, %2
- +%endmacro
- +
- +;-----------------------------------------------------------------------------
- +; void pred8x8l_top_dc(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_top_dc_mmxext, 4,4
- + sub r0, r3
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1 ; top_left
- + jz .fix_lt_2
- + test r2, r2 ; top_right
- + jz .fix_tr_1
- +.do_top:
- + PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5
- + jmp .body
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2 ; top_right
- + jnz .do_top
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- + jmp .do_top
- +.body
- + pxor mm1, mm1
- + psadbw mm1, mm0
- + paddw mm1, [pw_4]
- + psrlw mm1, 3
- + pshufw mm1, mm1, 0
- + packuswb mm1, mm1
- +%rep 3
- + movq [r0+r3*1], mm1
- + movq [r0+r3*2], mm1
- + lea r0, [r0+r3*2]
- +%endrep
- + movq [r0+r3*1], mm1
- + movq [r0+r3*2], mm1
- + RET
- +
- ;-----------------------------------------------------------------------------
- ; void pred8x8_dc_rv40(uint8_t *src, int stride)
- ;-----------------------------------------------------------------------------
- @@ -1073,18 +1139,6 @@ cglobal pred4x4_tm_vp8_ssse3, 3,3
- movd [r1+r2*2], mm5
- RET
- -; dest, left, right, src, tmp
- -; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
- -%macro PRED4x4_LOWPASS 5
- - mova %5, %2
- - pavgb %2, %3
- - pxor %3, %5
- - mova %1, %4
- - pand %3, [pb_1]
- - psubusb %2, %3
- - pavgb %1, %2
- -%endmacro
- -
- ;-----------------------------------------------------------------------------
- ; void pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 10a6dd6..aba02ce 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -57,6 +57,7 @@ void ff_pred8x8_tm_vp8_mmx (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride);
- +void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
- @@ -94,6 +95,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext;
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext;
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
- + h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- --
- 1.7.2.2
- From 30e18a52fb0409fd02de4f25e26285084fbb304f Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sat, 25 Dec 2010 14:49:27 -0500
- Subject: [PATCH 02/15] pred8x8l_vertical_mmxext
- ---
- libavcodec/x86/h264_intrapred.asm | 46 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 2 files changed, 48 insertions(+), 0 deletions(-)
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index b21516b..62a16ff 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -842,6 +842,52 @@ PRED8x8_H ssse3
- %endmacro
- ;-----------------------------------------------------------------------------
- +; void pred8x8l_vertical_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_vertical_mmxext, 4,4
- + sub r0, r3
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1 ; top_left
- + jz .fix_lt_2
- + test r2, r2 ; top_right
- + jz .fix_tr_1
- +.do_top:
- + PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5
- + jmp .body
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2 ; top_right
- + jnz .do_top
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- + jmp .do_top
- +.body
- +%rep 3
- + movq [r0+r3*1], mm0
- + movq [r0+r3*2], mm0
- + lea r0, [r0+r3*2]
- +%endrep
- + movq [r0+r3*1], mm0
- + movq [r0+r3*2], mm0
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8l_top_dc(uint8_t *src, int has_topleft, int has_topright, int stride)
- ;-----------------------------------------------------------------------------
- INIT_MMX
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index aba02ce..79bdaec 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -58,6 +58,7 @@ void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride);
- void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_vertical_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
- @@ -96,6 +97,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext;
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
- h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
- + h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_mmxext;
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- --
- 1.7.2.2
- From 5fda00845e1c491ea8706782885737cc0ad15d7e Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sat, 25 Dec 2010 16:21:59 -0500
- Subject: [PATCH 03/15] pred8x8_top_dc_mmxext
- ---
- libavcodec/x86/h264_intrapred.asm | 28 ++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 3 +++
- 2 files changed, 31 insertions(+), 0 deletions(-)
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 62a16ff..3c39b71 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -940,6 +940,34 @@ cglobal pred8x8l_top_dc_mmxext, 4,4
- RET
- ;-----------------------------------------------------------------------------
- +; void pred8x8_top_dc_mmxext(uint8_t *src, int stride)
- +;-----------------------------------------------------------------------------
- +cglobal pred8x8_top_dc_mmxext, 2,2
- + sub r0, r1
- + movq mm0, [r0]
- + pxor mm1, mm1
- + pxor mm2, mm2
- + punpckhbw mm1, mm0
- + punpcklbw mm0, mm2
- + psadbw mm1, mm2 ; s1
- + psadbw mm0, mm2 ; s0
- + psrlw mm1, 1
- + psrlw mm0, 1
- + pavgw mm1, mm2
- + pavgw mm0, mm2
- + pshufw mm1, mm1, 0
- + pshufw mm0, mm0, 0 ; dc0 (w)
- + packuswb mm0, mm1 ; dc0,dc1 (b)
- +%rep 3
- + movq [r0+r1*1], mm0
- + movq [r0+r1*2], mm0
- + lea r0, [r0+r1*2]
- +%endrep
- + movq [r0+r1*1], mm0
- + movq [r0+r1*2], mm0
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8_dc_rv40(uint8_t *src, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 79bdaec..dfa3b3f 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -49,6 +49,7 @@ void ff_pred8x8_vertical_mmx (uint8_t *src, int stride);
- void ff_pred8x8_horizontal_mmx (uint8_t *src, int stride);
- void ff_pred8x8_horizontal_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_horizontal_ssse3 (uint8_t *src, int stride);
- +void ff_pred8x8_top_dc_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_plane_mmx (uint8_t *src, int stride);
- void ff_pred8x8_plane_mmx2 (uint8_t *src, int stride);
- void ff_pred8x8_plane_sse2 (uint8_t *src, int stride);
- @@ -101,6 +102,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- + if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264)
- + h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_mmxext;
- if (codec_id == CODEC_ID_VP8) {
- h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext;
- h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmxext;
- --
- 1.7.2.2
- From 8d6b2365fa48e6dab2957e81702297d56ee67429 Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sat, 25 Dec 2010 17:42:43 -0500
- Subject: [PATCH 04/15] pred8x8l_horizontal_mmxext
- ---
- libavcodec/x86/h264_intrapred.asm | 75 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 2 files changed, 77 insertions(+), 0 deletions(-)
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 3c39b71..5c5af03 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -968,6 +968,81 @@ cglobal pred8x8_top_dc_mmxext, 2,2
- RET
- ;-----------------------------------------------------------------------------
- +; void pred8x8l_horizontal_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_horizontal_mmxext, 4,4
- + sub r0, r3
- + lea r2, [r0+r3*2]
- + movq mm0, [r0+r3*1-8]
- + punpckhbw mm0, [r0+r3*0-8]
- + movq mm1, [r2+r3*1-8]
- + punpckhbw mm1, [r0+r3*2-8]
- + mov r2, r0
- + punpckhwd mm1, mm0
- + lea r0, [r0+r3*4]
- + movq mm2, [r0+r3*1-8]
- + punpckhbw mm2, [r0+r3*0-8]
- + lea r0, [r0+r3*2]
- + movq mm3, [r0+r3*1-8]
- + punpckhbw mm3, [r0+r3*0-8]
- + punpckhwd mm3, mm2
- + punpckhdq mm3, mm1
- + lea r0, [r0+r3*2]
- + movq mm0, [r0+r3*0-8]
- + movq mm1, [r2]
- + mov r0, r2
- + movq mm4, mm3
- + movq mm2, mm3
- + PALIGNR mm4, mm0, 7, mm0
- + PALIGNR mm1, mm2, 1, mm2
- + test r1, r1 ; top_left
- + jz .fix_lt_1
- +.do_left:
- + movq mm0, mm4
- + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
- + movq mm4, mm0
- + movq mm7, mm2
- + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
- + psllq mm1, 56
- + PALIGNR mm7, mm1, 7, mm3
- + movq mm3, mm7
- + jmp .body
- +.fix_lt_1:
- + movq mm5, mm3
- + pxor mm5, mm4
- + psrlq mm5, 56
- + psllq mm5, 48
- + pxor mm1, mm5
- + jmp .do_left
- +.body
- + movq mm7, mm3
- + punpckhbw mm3, mm3
- + punpcklbw mm7, mm7
- + pshufw mm0, mm3, 0xff
- + pshufw mm1, mm3, 0xaa
- + pshufw mm2, mm3, 0x55
- + pshufw mm3, mm3, 0x00
- + pshufw mm4, mm7, 0xff
- + pshufw mm5, mm7, 0xaa
- + pshufw mm6, mm7, 0x55
- + pshufw mm7, mm7, 0x00
- + lea r1, [r0+r3*2]
- + lea r2, [r1+r3*2]
- + movq [r0+r3*1], mm0
- + movq [r0+r3*2], mm1
- + movq [r1+r3*1], mm2
- + movq [r1+r3*2], mm3
- + movq [r2+r3*1], mm4
- + movq [r2+r3*2], mm5
- + lea r0, [r2+r3*2]
- + movq [r0+r3*1], mm6
- + movq [r0+r3*2], mm7
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8_dc_rv40(uint8_t *src, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index dfa3b3f..98a906f 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -60,6 +60,7 @@ void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride);
- void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_vertical_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
- @@ -99,6 +100,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
- h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
- h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_mmxext;
- + h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_mmxext;
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- --
- 1.7.2.2
- From 1713c97a216d9127e6e29d1edddc62dacde6608b Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sat, 25 Dec 2010 19:45:23 -0500
- Subject: [PATCH 05/15] pred8x8l_dc_mmxext
- ---
- libavcodec/x86/h264_intrapred.asm | 98 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 2 files changed, 100 insertions(+), 0 deletions(-)
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 5c5af03..dfe381c 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -40,6 +40,7 @@ cextern pb_1
- cextern pb_3
- cextern pw_4
- cextern pw_5
- +cextern pw_8
- cextern pw_16
- cextern pw_17
- cextern pw_32
- @@ -888,6 +889,103 @@ cglobal pred8x8l_vertical_mmxext, 4,4
- RET
- ;-----------------------------------------------------------------------------
- +;void pred8x8l_dc_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_dc_mmxext, 4,5
- + sub r0, r3
- + lea r4, [r0+r3*2]
- + movq mm0, [r0+r3*1-8]
- + punpckhbw mm0, [r0+r3*0-8]
- + movq mm1, [r4+r3*1-8]
- + punpckhbw mm1, [r0+r3*2-8]
- + mov r4, r0
- + punpckhwd mm1, mm0
- + lea r0, [r0+r3*4]
- + movq mm2, [r0+r3*1-8]
- + punpckhbw mm2, [r0+r3*0-8]
- + lea r0, [r0+r3*2]
- + movq mm3, [r0+r3*1-8]
- + punpckhbw mm3, [r0+r3*0-8]
- + punpckhwd mm3, mm2
- + punpckhdq mm3, mm1
- + lea r0, [r0+r3*2]
- + movq mm0, [r0+r3*0-8]
- + movq mm1, [r4]
- + mov r0, r4
- + movq mm4, mm3
- + movq mm2, mm3
- + PALIGNR mm4, mm0, 7, mm0
- + PALIGNR mm1, mm2, 1, mm2
- + test r1, r1 ; top_left
- + jz .fix_lt_1
- +.do_left:
- + movq mm0, mm4
- + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
- + movq mm4, mm0
- + movq mm7, mm2
- + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
- + psllq mm1, 56
- + PALIGNR mm7, mm1, 7, mm3
- +.check_top:
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1 ; top_left
- + jz .fix_lt_2
- + test r2, r2 ; top_right
- + jz .fix_tr_1
- +.do_top:
- + PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
- + jmp .body
- +.fix_lt_1:
- + movq mm5, mm3
- + pxor mm5, mm4
- + psrlq mm5, 56
- + psllq mm5, 48
- + pxor mm1, mm5
- + jmp .do_left
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2 ; top_right
- + jnz .do_top
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- + jmp .do_top
- +.body
- + pxor mm0, mm0
- + pxor mm1, mm1
- + psadbw mm0, mm7
- + psadbw mm1, mm6
- + paddw mm0, [pw_8]
- + paddw mm0, mm1
- + psrlw mm0, 4
- + pshufw mm0, mm0, 0
- + packuswb mm0, mm0
- +%rep 3
- + movq [r0+r3*1], mm0
- + movq [r0+r3*2], mm0
- + lea r0, [r0+r3*2]
- +%endrep
- + movq [r0+r3*1], mm0
- + movq [r0+r3*2], mm0
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8l_top_dc(uint8_t *src, int has_topleft, int has_topright, int stride)
- ;-----------------------------------------------------------------------------
- INIT_MMX
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 98a906f..b5e6c02 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -58,6 +58,7 @@ void ff_pred8x8_tm_vp8_mmx (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride);
- +void ff_pred8x8l_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_vertical_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- @@ -98,6 +99,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext;
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext;
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
- + h->pred8x8l[DC_PRED ] = ff_pred8x8l_dc_mmxext;
- h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
- h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_mmxext;
- h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_mmxext;
- --
- 1.7.2.2
- From 2334210d134a7e62838f011484738f6548db361d Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sat, 25 Dec 2010 21:06:06 -0500
- Subject: [PATCH 06/15] pred8x8l_horizontal_up_mmxext
- ---
- libavcodec/h264.c | 2 +
- libavcodec/x86/h264_intrapred.asm | 96 +++++++++++++++++++++++++++++++++-
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 3 files changed, 98 insertions(+), 2 deletions(-)
- diff --git a/libavcodec/h264.c b/libavcodec/h264.c
- index 318c1c8..cbcbc42 100644
- --- a/libavcodec/h264.c
- +++ b/libavcodec/h264.c
- @@ -1190,8 +1190,10 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
- h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
- }else{
- const int nnz = h->non_zero_count_cache[ scan8[i] ];
- +START_TIMER;
- h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
- (h->topright_samples_available<<i)&0x4000, linesize);
- +if (dir == HOR_UP_PRED) { STOP_TIMER("pred8x8l_horizontal_up"); }
- if(nnz){
- if(nnz == 1 && h->mb[i*16])
- idct_dc_add(ptr, h->mb + i*16, linesize);
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index dfe381c..901823c 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -1116,19 +1116,19 @@ cglobal pred8x8l_horizontal_mmxext, 4,4
- pxor mm1, mm5
- jmp .do_left
- .body
- + lea r1, [r0+r3*2]
- movq mm7, mm3
- punpckhbw mm3, mm3
- punpcklbw mm7, mm7
- pshufw mm0, mm3, 0xff
- pshufw mm1, mm3, 0xaa
- + lea r2, [r1+r3*2]
- pshufw mm2, mm3, 0x55
- pshufw mm3, mm3, 0x00
- pshufw mm4, mm7, 0xff
- pshufw mm5, mm7, 0xaa
- pshufw mm6, mm7, 0x55
- pshufw mm7, mm7, 0x00
- - lea r1, [r0+r3*2]
- - lea r2, [r1+r3*2]
- movq [r0+r3*1], mm0
- movq [r0+r3*2], mm1
- movq [r1+r3*1], mm2
- @@ -1141,6 +1141,98 @@ cglobal pred8x8l_horizontal_mmxext, 4,4
- RET
- ;-----------------------------------------------------------------------------
- +; void pred8x8l_horizontal_up_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_horizontal_up_mmxext, 4,4
- + sub r0, r3
- + lea r2, [r0+r3*2]
- + movq mm0, [r0+r3*1-8]
- + punpckhbw mm0, [r0+r3*0-8]
- + movq mm1, [r2+r3*1-8]
- + punpckhbw mm1, [r0+r3*2-8]
- + mov r2, r0
- + punpckhwd mm1, mm0
- + lea r0, [r0+r3*4]
- + movq mm2, [r0+r3*1-8]
- + punpckhbw mm2, [r0+r3*0-8]
- + lea r0, [r0+r3*2]
- + movq mm3, [r0+r3*1-8]
- + punpckhbw mm3, [r0+r3*0-8]
- + punpckhwd mm3, mm2
- + punpckhdq mm3, mm1
- + lea r0, [r0+r3*2]
- + movq mm0, [r0+r3*0-8]
- + movq mm1, [r2]
- + mov r0, r2
- + movq mm4, mm3
- + movq mm2, mm3
- + PALIGNR mm4, mm0, 7, mm0
- + PALIGNR mm1, mm2, 1, mm2
- + test r1, r1 ; top_left
- + jz .fix_lt_1
- +.do_left:
- + movq mm0, mm4
- + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
- + movq mm4, mm0
- + movq mm7, mm2
- + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
- + psllq mm1, 56
- + PALIGNR mm7, mm1, 7, mm3
- + movq mm1, mm7
- + jmp .body
- +.fix_lt_1:
- + movq mm5, mm3
- + pxor mm5, mm4
- + psrlq mm5, 56
- + psllq mm5, 48
- + pxor mm1, mm5
- + jmp .do_left
- +.body
- + lea r1, [r0+r3*2]
- + movq mm1, mm7 ; l0 l1 l2 l3 l4 l5 l6 l7
- + pshufw mm0, mm1, 00011011b ; l6 l7 l4 l5 l2 l3 l0 l1
- + psllq mm1, 56 ; l7 .. .. .. .. .. .. ..
- + movq mm2, mm0
- + psllw mm0, 8
- + psrlw mm2, 8
- + por mm2, mm0 ; l7 l6 l5 l4 l3 l2 l1 l0
- + movq mm3, mm2
- + movq mm4, mm2
- + movq mm5, mm2
- + psrlq mm2, 8
- + psrlq mm3, 16
- + lea r2, [r1+r3*2]
- + por mm2, mm1 ; l7 l7 l6 l5 l4 l3 l2 l1
- + punpckhbw mm1, mm1
- + por mm3, mm1 ; l7 l7 l7 l6 l5 l4 l3 l2
- + pavgb mm4, mm2
- + PRED4x4_LOWPASS mm1, mm3, mm5, mm2, mm6
- + movq mm5, mm4
- + punpcklbw mm4, mm1 ; p4 p3 p2 p1
- + punpckhbw mm5, mm1 ; p8 p7 p6 p5
- + movq mm6, mm5
- + movq mm7, mm5
- + movq mm0, mm5
- + PALIGNR mm5, mm4, 2, mm1
- + pshufw mm1, mm6, 11111001b
- + PALIGNR mm6, mm4, 4, mm2
- + pshufw mm2, mm7, 11111110b
- + PALIGNR mm7, mm4, 6, mm3
- + pshufw mm3, mm0, 11111111b
- + movq [r0+r3*1], mm4
- + movq [r0+r3*2], mm5
- + lea r0, [r2+r3*2]
- + movq [r1+r3*1], mm6
- + movq [r1+r3*2], mm7
- + movq [r2+r3*1], mm0
- + movq [r2+r3*2], mm1
- + movq [r0+r3*1], mm2
- + movq [r0+r3*2], mm3
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8_dc_rv40(uint8_t *src, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index b5e6c02..4877919 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -62,6 +62,7 @@ void ff_pred8x8l_dc_mmxext (uint8_t *src, int has_topleft, int has_topri
- void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_vertical_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_horizontal_up_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
- @@ -103,6 +104,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
- h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_mmxext;
- h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_mmxext;
- + h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_mmxext;
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- --
- 1.7.2.2
- From 3452703f64c49b467c6b6664b1b8ebe9d7a854cc Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sat, 25 Dec 2010 23:41:50 -0500
- Subject: [PATCH 07/15] pred8x8l_down_left_mmxext
- ---
- libavcodec/h264.c | 2 +-
- libavcodec/x86/h264_intrapred.asm | 91 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 3 files changed, 94 insertions(+), 1 deletions(-)
- diff --git a/libavcodec/h264.c b/libavcodec/h264.c
- index cbcbc42..f0d314e 100644
- --- a/libavcodec/h264.c
- +++ b/libavcodec/h264.c
- @@ -1193,7 +1193,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
- START_TIMER;
- h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
- (h->topright_samples_available<<i)&0x4000, linesize);
- -if (dir == HOR_UP_PRED) { STOP_TIMER("pred8x8l_horizontal_up"); }
- +if (dir == DIAG_DOWN_LEFT_PRED) { STOP_TIMER("pred8x8l_down_left"); }
- if(nnz){
- if(nnz == 1 && h->mb[i*16])
- idct_dc_add(ptr, h->mb + i*16, linesize);
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 901823c..788cbc9 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -1233,6 +1233,97 @@ cglobal pred8x8l_horizontal_up_mmxext, 4,4
- RET
- ;-----------------------------------------------------------------------------
- +;void pred8x8l_down_left_sse2(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_down_left_sse2, 4,4
- + sub r0, r3
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1 ; top_left
- + jz .fix_lt_2
- + test r2, r2 ; top_right
- + jz .fix_tr_1
- +.do_top:
- + PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5
- + movq mm7, mm4
- + test r2, r2 ; top_right
- + jz .fix_tr_2
- + movq mm0, [r0+8]
- + movq mm5, mm0
- + movq mm2, mm0
- + movq mm4, mm0
- + psrlq mm5, 56
- + PALIGNR mm2, mm3, 7, mm3
- + PALIGNR mm5, mm4, 1, mm4
- + PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4
- + jmp .do_topright
- +.fix_tr_2:
- + punpckhbw mm3, mm3
- + pshufw mm1, mm3, 0xFF
- +.do_topright:
- + movq mm6, mm1
- + psrlq mm1, 56
- + movq mm5, mm1
- + jmp .body
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2 ; top_right
- + jnz .do_top
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- + jmp .do_top
- +.body
- + lea r1, [r0+r3*2]
- + movq2dq xmm3, mm7
- + movq2dq xmm4, mm6
- + pslldq xmm4, 8
- + por xmm3, xmm4
- + movdqa xmm2, xmm3
- + psrldq xmm2, 1
- + movq2dq xmm5, mm5
- + pslldq xmm5, 15
- + por xmm2, xmm5
- + lea r2, [r1+r3*2]
- + movdqa xmm1, xmm3
- + pslldq xmm1, 1
- +INIT_XMM
- + PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm4
- + psrldq xmm0, 1
- + movq [r0+r3*1], xmm0
- + psrldq xmm0, 1
- + movq [r0+r3*2], xmm0
- + psrldq xmm0, 1
- + lea r0, [r2+r3*2]
- + movq [r1+r3*1], xmm0
- + psrldq xmm0, 1
- + movq [r1+r3*2], xmm0
- + psrldq xmm0, 1
- + movq [r2+r3*1], xmm0
- + psrldq xmm0, 1
- + movq [r2+r3*2], xmm0
- + psrldq xmm0, 1
- + movq [r0+r3*1], xmm0
- + psrldq xmm0, 1
- + movq [r0+r3*2], xmm0
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8_dc_rv40(uint8_t *src, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 4877919..4bf9f78 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -63,6 +63,7 @@ void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topri
- void ff_pred8x8l_vertical_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_horizontal_up_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_down_left_sse2 (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
- @@ -134,6 +135,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- if (mm_flags & AV_CPU_FLAG_SSE2) {
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2;
- + h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_sse2;
- if (codec_id == CODEC_ID_VP8) {
- h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_sse2;
- h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_sse2;
- --
- 1.7.2.2
- From 66b03d80b08ace9f0d99b12c79cc111a6950c166 Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sun, 26 Dec 2010 18:07:58 -0500
- Subject: [PATCH 08/15] pred8x8l_vertical_right_sse2
- ---
- libavcodec/h264.c | 2 +-
- libavcodec/x86/h264_intrapred.asm | 117 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 3 files changed, 120 insertions(+), 1 deletions(-)
- diff --git a/libavcodec/h264.c b/libavcodec/h264.c
- index f0d314e..8353b51 100644
- --- a/libavcodec/h264.c
- +++ b/libavcodec/h264.c
- @@ -1193,7 +1193,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
- START_TIMER;
- h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
- (h->topright_samples_available<<i)&0x4000, linesize);
- -if (dir == DIAG_DOWN_LEFT_PRED) { STOP_TIMER("pred8x8l_down_left"); }
- +if (dir == VERT_RIGHT_PRED) { STOP_TIMER("pred8x8l_vertical_right"); }
- if(nnz){
- if(nnz == 1 && h->mb[i*16])
- idct_dc_add(ptr, h->mb + i*16, linesize);
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 788cbc9..1327ecb 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -25,6 +25,7 @@
- SECTION_RODATA
- tm_shuf: times 8 db 0x03, 0x80
- +pw_ff00: times 8 dw 0xff00
- plane_shuf: db -8, -7, -6, -5, -4, -3, -2, -1
- db 1, 2, 3, 4, 5, 6, 7, 8
- plane8_shuf: db -4, -3, -2, -1, 0, 0, 0, 0
- @@ -1233,6 +1234,122 @@ cglobal pred8x8l_horizontal_up_mmxext, 4,4
- RET
- ;-----------------------------------------------------------------------------
- +; void pred8x8l_vertical_right_sse2(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_vertical_right_sse2, 4,5,7
- + sub r0, r3
- + lea r4, [r0+r3*2]
- + movq mm0, [r0+r3*1-8]
- + punpckhbw mm0, [r0+r3*0-8]
- + movq mm1, [r4+r3*1-8]
- + punpckhbw mm1, [r0+r3*2-8]
- + mov r4, r0
- + punpckhwd mm1, mm0
- + lea r0, [r0+r3*4]
- + movq mm2, [r0+r3*1-8]
- + punpckhbw mm2, [r0+r3*0-8]
- + lea r0, [r0+r3*2]
- + movq mm3, [r0+r3*1-8]
- + punpckhbw mm3, [r0+r3*0-8]
- + punpckhwd mm3, mm2
- + punpckhdq mm3, mm1
- + lea r0, [r0+r3*2]
- + movq mm0, [r0+r3*0-8]
- + movq mm1, [r4]
- + mov r0, r4
- + movq mm4, mm3
- + movq mm2, mm3
- + PALIGNR mm4, mm0, 7, mm0
- + PALIGNR mm1, mm2, 1, mm2
- + test r1, r1 ; top_left
- + jz .fix_lt_1
- +.do_left:
- + movq mm0, mm4
- + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
- + movq mm7, mm2
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1 ; top_left
- + jz .fix_lt_2
- + test r2, r2 ; top_right
- + jz .fix_tr_1
- +.do_top
- + PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
- + jmp .body
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2 ; top_right
- + jnz .do_top
- +.fix_lt_1:
- + movq mm5, mm3
- + pxor mm5, mm4
- + psrlq mm5, 56
- + psllq mm5, 48
- + pxor mm1, mm5
- + jmp .do_left
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- + jmp .do_top
- +.body
- + lea r1, [r0+r3*2]
- + movq2dq xmm0, mm7
- + movq2dq xmm4, mm6
- + pslldq xmm4, 8
- + por xmm0, xmm4
- + movdqa xmm6, [pw_ff00]
- + movdqa xmm1, xmm0
- + lea r2, [r1+r3*2]
- + movdqa xmm2, xmm0
- + movdqa xmm3, xmm0
- + pslldq xmm0, 1
- + pslldq xmm1, 2
- + pavgb xmm2, xmm0
- +INIT_XMM
- + PRED4x4_LOWPASS xmm4, xmm3, xmm1, xmm0, xmm5
- + pandn xmm6, xmm4
- + movdqa xmm5, xmm4
- + psrlw xmm4, 8
- + packuswb xmm6, xmm4
- + movhlps xmm4, xmm6
- + movhps [r0+r3*2], xmm5
- + movhps [r0+r3*1], xmm2
- + psrldq xmm5, 4
- + movss xmm5, xmm6
- + psrldq xmm2, 4
- + movss xmm2, xmm4
- + lea r0, [r2+r3*2]
- +
- + psrldq xmm5, 1
- + psrldq xmm2, 1
- + movq [r0+r3*2], xmm5
- + movq [r0+r3*1], xmm2
- + psrldq xmm5, 1
- + psrldq xmm2, 1
- + movq [r2+r3*2], xmm5
- + movq [r2+r3*1], xmm2
- + psrldq xmm5, 1
- + psrldq xmm2, 1
- + movq [r1+r3*2], xmm5
- + movq [r1+r3*1], xmm2
- + RET
- +
- +;-----------------------------------------------------------------------------
- ;void pred8x8l_down_left_sse2(uint8_t *src, int has_topleft, int has_topright, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 4bf9f78..1c5dcc5 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -64,6 +64,7 @@ void ff_pred8x8l_vertical_mmxext (uint8_t *src, int has_topleft, int has_topri
- void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_horizontal_up_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_down_left_sse2 (uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_vertical_right_sse2(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
- @@ -136,6 +137,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- if (mm_flags & AV_CPU_FLAG_SSE2) {
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2;
- h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_sse2;
- + h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_sse2;
- if (codec_id == CODEC_ID_VP8) {
- h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_sse2;
- h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_sse2;
- --
- 1.7.2.2
- From 0b2f1deb8b4e660ee5fa0bf47436a2a606ac3795 Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sun, 26 Dec 2010 18:40:47 -0500
- Subject: [PATCH 09/15] pred8x8l_vertical_left_sse2
- ---
- libavcodec/h264.c | 2 +-
- libavcodec/x86/h264_intrapred.asm | 85 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 3 files changed, 88 insertions(+), 1 deletions(-)
- diff --git a/libavcodec/h264.c b/libavcodec/h264.c
- index 8353b51..5a1fdb9 100644
- --- a/libavcodec/h264.c
- +++ b/libavcodec/h264.c
- @@ -1193,7 +1193,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
- START_TIMER;
- h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
- (h->topright_samples_available<<i)&0x4000, linesize);
- -if (dir == VERT_RIGHT_PRED) { STOP_TIMER("pred8x8l_vertical_right"); }
- +if (dir == VERT_LEFT_PRED) { STOP_TIMER("pred8x8l_vertical_left"); }
- if(nnz){
- if(nnz == 1 && h->mb[i*16])
- idct_dc_add(ptr, h->mb + i*16, linesize);
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 1327ecb..512db1b 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -1350,6 +1350,91 @@ INIT_XMM
- RET
- ;-----------------------------------------------------------------------------
- +;void pred8x8l_vertical_left_sse2(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_vertical_left_sse2, 4,4
- + sub r0, r3
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1 ; top_left
- + jz .fix_lt_2
- + test r2, r2 ; top_right
- + jz .fix_tr_1
- +.do_top:
- + PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5
- + movq2dq xmm4, mm4
- + test r2, r2 ; top_right
- + jz .fix_tr_2
- + movq mm0, [r0+8]
- + movq mm5, mm0
- + movq mm2, mm0
- + movq mm4, mm0
- + psrlq mm5, 56
- + PALIGNR mm2, mm3, 7, mm3
- + PALIGNR mm5, mm4, 1, mm4
- + PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4
- + jmp .do_topright
- +.fix_tr_2:
- + punpckhbw mm3, mm3
- + pshufw mm1, mm3, 0xFF
- +.do_topright:
- + movq2dq xmm3, mm1
- + jmp .body
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2 ; top_right
- + jnz .do_top
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- + jmp .do_top
- +.body
- + lea r1, [r0+r3*2]
- + pslldq xmm3, 8
- + por xmm4, xmm3
- + movdqa xmm2, xmm4
- + movdqa xmm1, xmm4
- + movdqa xmm3, xmm4
- + psrldq xmm2, 1
- + pslldq xmm1, 1
- + pavgb xmm3, xmm2
- + lea r2, [r1+r3*2]
- +INIT_XMM
- + PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm4, xmm5
- + psrldq xmm0, 1
- + movq [r0+r3*1], xmm3
- + movq [r0+r3*2], xmm0
- + lea r0, [r2+r3*2]
- + psrldq xmm3, 1
- + psrldq xmm0, 1
- + movq [r1+r3*1], xmm3
- + movq [r1+r3*2], xmm0
- + psrldq xmm3, 1
- + psrldq xmm0, 1
- + movq [r2+r3*1], xmm3
- + movq [r2+r3*2], xmm0
- + psrldq xmm3, 1
- + psrldq xmm0, 1
- + movq [r0+r3*1], xmm3
- + movq [r0+r3*2], xmm0
- + RET
- +
- +;-----------------------------------------------------------------------------
- ;void pred8x8l_down_left_sse2(uint8_t *src, int has_topleft, int has_topright, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 1c5dcc5..ebba657 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -65,6 +65,7 @@ void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topri
- void ff_pred8x8l_horizontal_up_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_down_left_sse2 (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_vertical_right_sse2(uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_vertical_left_sse2(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
- @@ -138,6 +139,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2;
- h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_sse2;
- h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_sse2;
- + h->pred8x8l[VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_sse2;
- if (codec_id == CODEC_ID_VP8) {
- h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_sse2;
- h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_sse2;
- --
- 1.7.2.2
- From 5728a3079448b86a9d2778c2b8bdac528f1e3bbe Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sun, 26 Dec 2010 20:32:25 -0500
- Subject: [PATCH 10/15] pred8x8l_down_right_sse2
- ---
- libavcodec/h264.c | 2 +-
- libavcodec/x86/h264_intrapred.asm | 117 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 3 files changed, 120 insertions(+), 1 deletions(-)
- diff --git a/libavcodec/h264.c b/libavcodec/h264.c
- index 5a1fdb9..086e071 100644
- --- a/libavcodec/h264.c
- +++ b/libavcodec/h264.c
- @@ -1193,7 +1193,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
- START_TIMER;
- h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
- (h->topright_samples_available<<i)&0x4000, linesize);
- -if (dir == VERT_LEFT_PRED) { STOP_TIMER("pred8x8l_vertical_left"); }
- +if (dir == DIAG_DOWN_RIGHT_PRED) { STOP_TIMER("pred8x8l_down_right"); }
- if(nnz){
- if(nnz == 1 && h->mb[i*16])
- idct_dc_add(ptr, h->mb + i*16, linesize);
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 512db1b..9132045 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -1435,6 +1435,123 @@ INIT_XMM
- RET
- ;-----------------------------------------------------------------------------
- +; void pred8x8l_down_right_sse2(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_down_right_sse2, 4,5
- + sub r0, r3
- + lea r4, [r0+r3*2]
- + movq mm0, [r0+r3*1-8]
- + punpckhbw mm0, [r0+r3*0-8]
- + movq mm1, [r4+r3*1-8]
- + punpckhbw mm1, [r0+r3*2-8]
- + mov r4, r0
- + punpckhwd mm1, mm0
- + lea r0, [r0+r3*4]
- + movq mm2, [r0+r3*1-8]
- + punpckhbw mm2, [r0+r3*0-8]
- + lea r0, [r0+r3*2]
- + movq mm3, [r0+r3*1-8]
- + punpckhbw mm3, [r0+r3*0-8]
- + punpckhwd mm3, mm2
- + punpckhdq mm3, mm1
- + lea r0, [r0+r3*2]
- + movq mm0, [r0+r3*0-8]
- + movq mm1, [r4]
- + mov r0, r4
- + movq mm4, mm3
- + movq mm2, mm3
- + PALIGNR mm4, mm0, 7, mm0
- + PALIGNR mm1, mm2, 1, mm2
- + test r1, r1 ; top_left
- + jz .fix_lt_1
- +.do_left:
- + movq mm0, mm4
- + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
- + movq mm4, mm0
- + movq mm7, mm2
- + movq mm6, mm2
- + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
- + psllq mm1, 56
- + PALIGNR mm7, mm1, 7, mm3
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1 ; top_left
- + jz .fix_lt_2
- + test r2, r2 ; top_right
- + jz .fix_tr_1
- +.do_top:
- + PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5
- + movq mm5, mm4
- + jmp .body
- +.fix_lt_1:
- + movq mm5, mm3
- + pxor mm5, mm4
- + psrlq mm5, 56
- + psllq mm5, 48
- + pxor mm1, mm5
- + jmp .do_left
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2 ; top_right
- + jnz .do_top
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- + jmp .do_top
- +.body
- + lea r1, [r0+r3*2]
- + movq2dq xmm1, mm7
- + movq2dq xmm3, mm6
- + movq2dq xmm4, mm5
- + movdqa xmm0, xmm3
- + pslldq xmm4, 8
- + por xmm3, xmm4
- + lea r2, [r1+r3*2]
- + pslldq xmm4, 1
- + psrldq xmm0, 7
- + pslldq xmm0, 15
- + psrldq xmm0, 7
- + por xmm1, xmm0
- + por xmm1, xmm4
- + lea r0, [r2+r3*2]
- + movdqa xmm2, xmm3
- + psrldq xmm2, 1
- +INIT_XMM
- + PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm4
- + movdqa xmm1, xmm0
- + psrldq xmm1, 1
- + movq [r0+r3*2], xmm0
- + movq [r0+r3*1], xmm1
- + psrldq xmm0, 2
- + psrldq xmm1, 2
- + movq [r2+r3*2], xmm0
- + movq [r2+r3*1], xmm1
- + psrldq xmm0, 2
- + psrldq xmm1, 2
- + movq [r1+r3*2], xmm0
- + movq [r1+r3*1], xmm1
- + psrldq xmm0, 2
- + psrldq xmm1, 2
- + movq [r4+r3*2], xmm0
- + movq [r4+r3*1], xmm1
- + RET
- +
- +;-----------------------------------------------------------------------------
- ;void pred8x8l_down_left_sse2(uint8_t *src, int has_topleft, int has_topright, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index ebba657..265735d 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -64,6 +64,7 @@ void ff_pred8x8l_vertical_mmxext (uint8_t *src, int has_topleft, int has_topri
- void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_horizontal_up_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_down_left_sse2 (uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_down_right_sse2 (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_vertical_right_sse2(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_vertical_left_sse2(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- @@ -138,6 +139,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- if (mm_flags & AV_CPU_FLAG_SSE2) {
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2;
- h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_sse2;
- + h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_sse2;
- h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_sse2;
- h->pred8x8l[VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_sse2;
- if (codec_id == CODEC_ID_VP8) {
- --
- 1.7.2.2
- From 77f1d0de4b7aef01fd2c7697c0b8a312366a4d7d Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sun, 26 Dec 2010 22:46:24 -0500
- Subject: [PATCH 11/15] pred8x8l_horizontal_down_sse2
- ---
- libavcodec/h264.c | 2 +-
- libavcodec/x86/h264_intrapred.asm | 130 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 3 files changed, 133 insertions(+), 1 deletions(-)
- diff --git a/libavcodec/h264.c b/libavcodec/h264.c
- index 086e071..d80324c 100644
- --- a/libavcodec/h264.c
- +++ b/libavcodec/h264.c
- @@ -1193,7 +1193,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
- START_TIMER;
- h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
- (h->topright_samples_available<<i)&0x4000, linesize);
- -if (dir == DIAG_DOWN_RIGHT_PRED) { STOP_TIMER("pred8x8l_down_right"); }
- +if (dir == HOR_DOWN_PRED) { STOP_TIMER("pred8x8l_horizontal_down"); }
- if(nnz){
- if(nnz == 1 && h->mb[i*16])
- idct_dc_add(ptr, h->mb + i*16, linesize);
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 9132045..3b04ebf 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -1142,6 +1142,136 @@ cglobal pred8x8l_horizontal_mmxext, 4,4
- RET
- ;-----------------------------------------------------------------------------
- +;void pred8x8l_horizontal_down_sse2(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +
- +cglobal pred8x8l_horizontal_down_sse2, 4,5
- + sub r0, r3
- + lea r4, [r0+r3*2]
- + movq mm0, [r0+r3*1-8]
- + punpckhbw mm0, [r0+r3*0-8]
- + movq mm1, [r4+r3*1-8]
- + punpckhbw mm1, [r0+r3*2-8]
- + mov r4, r0
- + punpckhwd mm1, mm0
- + lea r0, [r0+r3*4]
- + movq mm2, [r0+r3*1-8]
- + punpckhbw mm2, [r0+r3*0-8]
- + lea r0, [r0+r3*2]
- + movq mm3, [r0+r3*1-8]
- + punpckhbw mm3, [r0+r3*0-8]
- + punpckhwd mm3, mm2
- + punpckhdq mm3, mm1
- + lea r0, [r0+r3*2]
- + movq mm0, [r0+r3*0-8]
- + movq mm1, [r4]
- + mov r0, r4
- + movq mm4, mm3
- + movq mm2, mm3
- + PALIGNR mm4, mm0, 7, mm0
- + PALIGNR mm1, mm2, 1, mm2
- + test r1, r1 ; top_left
- + jz .fix_lt_1
- +.do_left:
- + movq mm0, mm4
- + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
- + movq2dq xmm0, mm2
- + pslldq xmm0, 8
- + movq mm4, mm0
- + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
- + movq2dq xmm2, mm1
- + pslldq xmm2, 15
- + psrldq xmm2, 8
- + por xmm0, xmm2
- +.check_top:
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1 ; top_left
- + jz .fix_lt_2
- + test r2, r2 ; top_right
- + jz .fix_tr_1
- +.do_top:
- + PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5
- + movq2dq xmm1, mm4
- + test r2, r2 ; top_right
- + jz .fix_tr_2
- + movq mm0, [r0+8]
- + movq mm5, mm0
- + movq mm2, mm0
- + movq mm4, mm0
- + psrlq mm5, 56
- + PALIGNR mm2, mm3, 7, mm3
- + PALIGNR mm5, mm4, 1, mm4
- + PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4
- + jmp .do_topright
- +.fix_tr_2:
- + punpckhbw mm3, mm3
- + pshufw mm1, mm3, 0xFF
- +.do_topright:
- + movq2dq xmm5, mm1
- + pslldq xmm5, 8
- + por xmm1, xmm5
- + jmp .body
- +.fix_lt_1:
- + movq mm5, mm3
- + pxor mm5, mm4
- + psrlq mm5, 56
- + psllq mm5, 48
- + pxor mm1, mm5
- + jmp .do_left
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2 ; top_right
- + jnz .do_top
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- + jmp .do_top
- +.body
- +INIT_XMM
- + lea r2, [r4+r3*2]
- + movdqa xmm2, xmm1
- + movdqa xmm3, xmm1
- + PALIGNR xmm1, xmm0, 7, xmm4
- + PALIGNR xmm2, xmm0, 9, xmm5
- + lea r1, [r2+r3*2]
- + PALIGNR xmm3, xmm0, 8, xmm0
- + movdqa xmm4, xmm1
- + pavgb xmm4, xmm3
- + lea r0, [r1+r3*2]
- + PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm5
- + punpcklbw xmm4, xmm0
- + movhlps xmm0, xmm4
- +
- + movq [r0+r3*2], xmm4
- + movq [r2+r3*2], xmm0
- + psrldq xmm4, 2
- + psrldq xmm0, 2
- + movq [r0+r3*1], xmm4
- + movq [r2+r3*1], xmm0
- + psrldq xmm4, 2
- + psrldq xmm0, 2
- + movq [r1+r3*2], xmm4
- + movq [r4+r3*2], xmm0
- + psrldq xmm4, 2
- + psrldq xmm0, 2
- + movq [r1+r3*1], xmm4
- + movq [r4+r3*1], xmm0
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8l_horizontal_up_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride)
- ;-----------------------------------------------------------------------------
- INIT_MMX
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 265735d..b4b3a4e 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -67,6 +67,7 @@ void ff_pred8x8l_down_left_sse2 (uint8_t *src, int has_topleft, int has_toprigh
- void ff_pred8x8l_down_right_sse2 (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_vertical_right_sse2(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_vertical_left_sse2(uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_horizontal_down_sse2(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
- @@ -142,6 +143,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_sse2;
- h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_sse2;
- h->pred8x8l[VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_sse2;
- + h->pred8x8l[HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_sse2;
- if (codec_id == CODEC_ID_VP8) {
- h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_sse2;
- h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_sse2;
- --
- 1.7.2.2
- From 38136e3c0554e1fb20b4b39861597ab3543c3a1b Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Mon, 27 Dec 2010 00:26:06 -0500
- Subject: [PATCH 12/15] pred8x8_dc_mmxext
- ---
- libavcodec/h264.c | 2 +-
- libavcodec/x86/h264_intrapred.asm | 62 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 5 ++-
- 3 files changed, 67 insertions(+), 2 deletions(-)
- diff --git a/libavcodec/h264.c b/libavcodec/h264.c
- index d80324c..ff4dba1 100644
- --- a/libavcodec/h264.c
- +++ b/libavcodec/h264.c
- @@ -1193,7 +1193,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
- START_TIMER;
- h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
- (h->topright_samples_available<<i)&0x4000, linesize);
- -if (dir == HOR_DOWN_PRED) { STOP_TIMER("pred8x8l_horizontal_down"); }
- +if (dir == DC_PRED8x8) { STOP_TIMER("pred8x8_dc"); }
- if(nnz){
- if(nnz == 1 && h->mb[i*16])
- idct_dc_add(ptr, h->mb + i*16, linesize);
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 3b04ebf..5f29b18 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -1773,6 +1773,68 @@ INIT_XMM
- RET
- ;-----------------------------------------------------------------------------
- +; void pred8x8_dc_mmxext(uint8_t *src, int stride)
- +;-----------------------------------------------------------------------------
- +
- +INIT_MMX
- +cglobal pred8x8_dc_mmxext, 2,5
- + sub r0, r1
- + pxor m7, m7
- + movd m0, [r0+0]
- + movd m1, [r0+4]
- + psadbw m0, m7 ; s0
- + mov r4, r0
- + psadbw m1, m7 ; s1
- +
- + movzx r2d, byte [r0+r1*1-1]
- + movzx r3d, byte [r0+r1*2-1]
- + lea r0, [r0+r1*2]
- + add r2d, r3d
- + movzx r3d, byte [r0+r1*1-1]
- + add r2d, r3d
- + movzx r3d, byte [r0+r1*2-1]
- + add r2d, r3d
- + lea r0, [r0+r1*2]
- + movd m2, r2d ; s2
- +
- + movzx r2d, byte [r0+r1*1-1]
- + movzx r3d, byte [r0+r1*2-1]
- + lea r0, [r0+r1*2]
- + add r2d, r3d
- + movzx r3d, byte [r0+r1*1-1]
- + add r2d, r3d
- + movzx r3d, byte [r0+r1*2-1]
- + add r2d, r3d
- + movd m3, r2d ; s3
- +
- + punpcklwd m0, m1
- + mov r0, r4
- + punpcklwd m2, m3
- + punpckldq m0, m2 ; s0, s1, s2, s3
- + pshufw m3, m0, 11110110b ; s2, s1, s3, s3
- + lea r2, [r0+r1*2]
- + pshufw m0, m0, 01110100b ; s0, s1, s3, s1
- + paddw m0, m3
- + lea r3, [r2+r1*2]
- + psrlw m0, 2
- + pavgw m0, m7 ; s0+s2, s1, s3, s1+s3
- + lea r4, [r3+r1*2]
- + packuswb m0, m0
- + punpcklbw m0, m0
- + movq m1, m0
- + punpcklbw m0, m0
- + punpckhbw m1, m1
- + movq [r0+r1*1], m0
- + movq [r0+r1*2], m0
- + movq [r2+r1*1], m0
- + movq [r2+r1*2], m0
- + movq [r3+r1*1], m1
- + movq [r3+r1*2], m1
- + movq [r4+r1*1], m1
- + movq [r4+r1*2], m1
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8_dc_rv40(uint8_t *src, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index b4b3a4e..984f719 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -45,6 +45,7 @@ void ff_pred16x16_tm_vp8_mmx (uint8_t *src, int stride);
- void ff_pred16x16_tm_vp8_mmxext (uint8_t *src, int stride);
- void ff_pred16x16_tm_vp8_sse2 (uint8_t *src, int stride);
- void ff_pred8x8_dc_rv40_mmxext (uint8_t *src, int stride);
- +void ff_pred8x8_dc_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_vertical_mmx (uint8_t *src, int stride);
- void ff_pred8x8_horizontal_mmx (uint8_t *src, int stride);
- void ff_pred8x8_horizontal_mmxext (uint8_t *src, int stride);
- @@ -113,8 +114,10 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- - if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264)
- + if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) {
- + h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_mmxext;
- h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_mmxext;
- + }
- if (codec_id == CODEC_ID_VP8) {
- h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext;
- h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmxext;
- --
- 1.7.2.2
- From 49cdad992bd4dbcf91311aeba6c9894fc90c60a9 Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Mon, 27 Dec 2010 12:39:22 -0500
- Subject: [PATCH 13/15] pred8x8l_vertical_right_mmxext
- ---
- libavcodec/h264.c | 2 +-
- libavcodec/x86/h264_intrapred.asm | 118 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 4 +-
- 3 files changed, 122 insertions(+), 2 deletions(-)
- diff --git a/libavcodec/h264.c b/libavcodec/h264.c
- index ff4dba1..60cd944 100644
- --- a/libavcodec/h264.c
- +++ b/libavcodec/h264.c
- @@ -1193,7 +1193,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
- START_TIMER;
- h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
- (h->topright_samples_available<<i)&0x4000, linesize);
- -if (dir == DC_PRED8x8) { STOP_TIMER("pred8x8_dc"); }
- +if (dir == VERT_RIGHT_PRED) { STOP_TIMER("pred8x8l_vertical_right_mmxext"); }
- if(nnz){
- if(nnz == 1 && h->mb[i*16])
- idct_dc_add(ptr, h->mb + i*16, linesize);
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 5f29b18..51b8edf 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -1364,6 +1364,124 @@ cglobal pred8x8l_horizontal_up_mmxext, 4,4
- RET
- ;-----------------------------------------------------------------------------
- +; void pred8x8l_vertical_right_mmxext( pixel *src, pixel *edge )
- +;-----------------------------------------------------------------------------
- +
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_vertical_right_mmxext, 4,5
- + sub r0, r3
- + lea r4, [r0+r3*2]
- + movq mm0, [r0+r3*1-8]
- + punpckhbw mm0, [r0+r3*0-8]
- + movq mm1, [r4+r3*1-8]
- + punpckhbw mm1, [r0+r3*2-8]
- + mov r4, r0
- + punpckhwd mm1, mm0
- + lea r0, [r0+r3*4]
- + movq mm2, [r0+r3*1-8]
- + punpckhbw mm2, [r0+r3*0-8]
- + lea r0, [r0+r3*2]
- + movq mm3, [r0+r3*1-8]
- + punpckhbw mm3, [r0+r3*0-8]
- + punpckhwd mm3, mm2
- + punpckhdq mm3, mm1
- + lea r0, [r0+r3*2]
- + movq mm0, [r0+r3*0-8]
- + movq mm1, [r4]
- + mov r0, r4
- + movq mm4, mm3
- + movq mm2, mm3
- + PALIGNR mm4, mm0, 7, mm0
- + PALIGNR mm1, mm2, 1, mm2
- + test r1, r1 ; top_left
- + jz .fix_lt_1
- +.do_left:
- + movq mm0, mm4
- + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
- + movq mm7, mm2
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1 ; top_left
- + jz .fix_lt_2
- + test r2, r2 ; top_right
- + jz .fix_tr_1
- +.do_top
- + PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
- + jmp .body
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2 ; top_right
- + jnz .do_top
- +.fix_lt_1:
- + movq mm5, mm3
- + pxor mm5, mm4
- + psrlq mm5, 56
- + psllq mm5, 48
- + pxor mm1, mm5
- + jmp .do_left
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- + jmp .do_top
- +.body
- + lea r1, [r0+r3*2]
- + movq mm2, mm6
- + movq mm4, mm7
- + movq mm3, mm6
- + PALIGNR mm3, mm4, 7, mm0
- + movq mm1, mm6
- + PALIGNR mm1, mm4, 6, mm0
- + movq mm4, mm3
- + pavgb mm3, mm2
- + lea r2, [r1+r3*2]
- + PRED4x4_LOWPASS mm0, mm1, mm2, mm4, mm5
- + movq [r0+r3*1], mm3
- + movq [r0+r3*2], mm0
- + movq mm5, mm0
- + movq mm6, mm3
- + movq mm1, mm7
- + movq mm2, mm1
- + psllq mm2, 8
- + movq mm3, mm1
- + psllq mm3, 16
- + lea r4, [r2+r3*2]
- + PRED4x4_LOWPASS mm0, mm1, mm3, mm2, mm4
- +
- + PALIGNR mm6, mm0, 7, mm2
- + movq [r1+r3*1], mm6
- + psllq mm0, 8
- + PALIGNR mm5, mm0, 7, mm1
- + movq [r1+r3*2], mm5
- + psllq mm0, 8
- +
- + PALIGNR mm6, mm0, 7, mm2
- + movq [r2+r3*1], mm6
- + psllq mm0, 8
- + PALIGNR mm5, mm0, 7, mm1
- + movq [r2+r3*2], mm5
- + psllq mm0, 8
- +
- + PALIGNR mm6, mm0, 7, mm2
- + movq [r4+r3*1], mm6
- + psllq mm0, 8
- + PALIGNR mm5, mm0, 7, mm1
- + movq [r4+r3*2], mm5
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8l_vertical_right_sse2(uint8_t *src, int has_topleft, int has_topright, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 984f719..821cb4b 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -66,6 +66,7 @@ void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topri
- void ff_pred8x8l_horizontal_up_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_down_left_sse2 (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_down_right_sse2 (uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_vertical_right_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_vertical_right_sse2(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_vertical_left_sse2(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_horizontal_down_sse2(uint8_t *src, int has_topleft, int has_topright, int stride);
- @@ -111,6 +112,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_mmxext;
- h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_mmxext;
- h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_mmxext;
- + h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_mmxext;
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- @@ -144,7 +146,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2;
- h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_sse2;
- h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_sse2;
- - h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_sse2;
- + //h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_sse2;
- h->pred8x8l[VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_sse2;
- h->pred8x8l[HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_sse2;
- if (codec_id == CODEC_ID_VP8) {
- --
- 1.7.2.2
- From b9e9a0bc398f2e1495b37125862b9d70f1c46b08 Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Mon, 27 Dec 2010 13:36:04 -0500
- Subject: [PATCH 14/15] pred8x8l_down_right_mmxext
- ---
- libavcodec/h264.c | 2 +-
- libavcodec/x86/h264_intrapred.asm | 137 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 4 +-
- 3 files changed, 141 insertions(+), 2 deletions(-)
- diff --git a/libavcodec/h264.c b/libavcodec/h264.c
- index 60cd944..e8e140d 100644
- --- a/libavcodec/h264.c
- +++ b/libavcodec/h264.c
- @@ -1193,7 +1193,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
- START_TIMER;
- h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
- (h->topright_samples_available<<i)&0x4000, linesize);
- -if (dir == VERT_RIGHT_PRED) { STOP_TIMER("pred8x8l_vertical_right_mmxext"); }
- +if (dir == DIAG_DOWN_RIGHT_PRED) { STOP_TIMER("pred8x8l_down_right_mmxext"); }
- if(nnz){
- if(nnz == 1 && h->mb[i*16])
- idct_dc_add(ptr, h->mb + i*16, linesize);
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 51b8edf..6273627 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -1683,6 +1683,143 @@ INIT_XMM
- RET
- ;-----------------------------------------------------------------------------
- +;void pred8x8l_down_right_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_down_right_mmxext, 4,5
- + sub r0, r3
- + lea r4, [r0+r3*2]
- + movq mm0, [r0+r3*1-8]
- + punpckhbw mm0, [r0+r3*0-8]
- + movq mm1, [r4+r3*1-8]
- + punpckhbw mm1, [r0+r3*2-8]
- + mov r4, r0
- + punpckhwd mm1, mm0
- + lea r0, [r0+r3*4]
- + movq mm2, [r0+r3*1-8]
- + punpckhbw mm2, [r0+r3*0-8]
- + lea r0, [r0+r3*2]
- + movq mm3, [r0+r3*1-8]
- + punpckhbw mm3, [r0+r3*0-8]
- + punpckhwd mm3, mm2
- + punpckhdq mm3, mm1
- + lea r0, [r0+r3*2]
- + movq mm0, [r0+r3*0-8]
- + movq mm1, [r4]
- + mov r0, r4
- + movq mm4, mm3
- + movq mm2, mm3
- + PALIGNR mm4, mm0, 7, mm0
- + PALIGNR mm1, mm2, 1, mm2
- + test r1, r1 ; top_left
- + jz .fix_lt_1
- +.do_left:
- + movq mm0, mm4
- + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
- + movq mm4, mm0
- + movq mm7, mm2
- + movq mm6, mm2
- + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
- + psllq mm1, 56
- + PALIGNR mm7, mm1, 7, mm3
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1 ; top_left
- + jz .fix_lt_2
- + test r2, r2 ; top_right
- + jz .fix_tr_1
- +.do_top:
- + PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5
- + movq mm5, mm4
- + jmp .body
- +.fix_lt_1:
- + movq mm5, mm3
- + pxor mm5, mm4
- + psrlq mm5, 56
- + psllq mm5, 48
- + pxor mm1, mm5
- + jmp .do_left
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2 ; top_right
- + jnz .do_top
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- + jmp .do_top
- +.body
- + lea r1, [r0+r3*2]
- + movq mm1, mm7
- + movq mm7, mm5
- + movq mm5, mm6
- + movq mm2, mm7
- + lea r2, [r1+r3*2]
- + PALIGNR mm2, mm6, 1, mm0
- + movq mm3, mm7
- + PALIGNR mm3, mm6, 7, mm0
- + movq mm4, mm7
- + lea r4, [r2+r3*2]
- + psrlq mm4, 8
- + PRED4x4_LOWPASS mm0, mm1, mm2, mm5, mm6
- + PRED4x4_LOWPASS mm1, mm3, mm4, mm7, mm6
- + movq [r4+r3*2], mm0
- + movq mm2, mm1
- + psrlq mm0, 8
- + psllq mm2, 56
- + psrlq mm1, 8
- + por mm0, mm2
- + movq [r4+r3*1], mm0
- + movq mm2, mm1
- + psrlq mm0, 8
- + psllq mm2, 56
- + psrlq mm1, 8
- + por mm0, mm2
- + movq [r2+r3*2], mm0
- + movq mm2, mm1
- + psrlq mm0, 8
- + psllq mm2, 56
- + psrlq mm1, 8
- + por mm0, mm2
- + movq [r2+r3*1], mm0
- + movq mm2, mm1
- + psrlq mm0, 8
- + psllq mm2, 56
- + psrlq mm1, 8
- + por mm0, mm2
- + movq [r1+r3*2], mm0
- + movq mm2, mm1
- + psrlq mm0, 8
- + psllq mm2, 56
- + psrlq mm1, 8
- + por mm0, mm2
- + movq [r1+r3*1], mm0
- + movq mm2, mm1
- + psrlq mm0, 8
- + psllq mm2, 56
- + psrlq mm1, 8
- + por mm0, mm2
- + movq [r0+r3*2], mm0
- + psrlq mm0, 8
- + psllq mm1, 56
- + por mm0, mm1
- + movq [r0+r3*1], mm0
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8l_down_right_sse2(uint8_t *src, int has_topleft, int has_topright, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 821cb4b..6c6cf30 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -66,6 +66,7 @@ void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topri
- void ff_pred8x8l_horizontal_up_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_down_left_sse2 (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_down_right_sse2 (uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_down_right_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_vertical_right_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_vertical_right_sse2(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_vertical_left_sse2(uint8_t *src, int has_topleft, int has_topright, int stride);
- @@ -113,6 +114,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_mmxext;
- h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_mmxext;
- h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_mmxext;
- + h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_mmxext;
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- @@ -145,7 +147,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- if (mm_flags & AV_CPU_FLAG_SSE2) {
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2;
- h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_sse2;
- - h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_sse2;
- + //h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_sse2;
- //h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_sse2;
- h->pred8x8l[VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_sse2;
- h->pred8x8l[HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_sse2;
- --
- 1.7.2.2
- From e3cb94f4dbb429a19abb4b581b5a0f48109dff14 Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Mon, 27 Dec 2010 15:10:45 -0500
- Subject: [PATCH 15/15] pred8x8l_horizontal_down_mmxext
- ---
- libavcodec/h264.c | 2 +-
- libavcodec/x86/h264_intrapred.asm | 124 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 4 +-
- 3 files changed, 128 insertions(+), 2 deletions(-)
- diff --git a/libavcodec/h264.c b/libavcodec/h264.c
- index e8e140d..94ae2b1 100644
- --- a/libavcodec/h264.c
- +++ b/libavcodec/h264.c
- @@ -1193,7 +1193,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
- START_TIMER;
- h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
- (h->topright_samples_available<<i)&0x4000, linesize);
- -if (dir == DIAG_DOWN_RIGHT_PRED) { STOP_TIMER("pred8x8l_down_right_mmxext"); }
- +if (dir == HOR_DOWN_PRED) { STOP_TIMER("pred8x8l_horizontal_down_mmxext"); }
- if(nnz){
- if(nnz == 1 && h->mb[i*16])
- idct_dc_add(ptr, h->mb + i*16, linesize);
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 6273627..f638255 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -1142,6 +1142,130 @@ cglobal pred8x8l_horizontal_mmxext, 4,4
- RET
- ;-----------------------------------------------------------------------------
- +;void pred8x8l_horizontal_down_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_horizontal_down_mmxext, 4,5
- + sub r0, r3
- + lea r4, [r0+r3*2]
- + movq mm0, [r0+r3*1-8]
- + punpckhbw mm0, [r0+r3*0-8]
- + movq mm1, [r4+r3*1-8]
- + punpckhbw mm1, [r0+r3*2-8]
- + mov r4, r0
- + punpckhwd mm1, mm0
- + lea r0, [r0+r3*4]
- + movq mm2, [r0+r3*1-8]
- + punpckhbw mm2, [r0+r3*0-8]
- + lea r0, [r0+r3*2]
- + movq mm3, [r0+r3*1-8]
- + punpckhbw mm3, [r0+r3*0-8]
- + punpckhwd mm3, mm2
- + punpckhdq mm3, mm1
- + lea r0, [r0+r3*2]
- + movq mm0, [r0+r3*0-8]
- + movq mm1, [r4]
- + mov r0, r4
- + movq mm4, mm3
- + movq mm2, mm3
- + PALIGNR mm4, mm0, 7, mm0
- + PALIGNR mm1, mm2, 1, mm2
- + test r1, r1 ; top_left
- + jz .fix_lt_1
- +.do_left:
- + movq mm0, mm4
- + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
- + movq mm4, mm0
- + movq mm7, mm2
- + movq mm6, mm2
- + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
- + psllq mm1, 56
- + PALIGNR mm7, mm1, 7, mm3
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1 ; top_left
- + jz .fix_lt_2
- + test r2, r2 ; top_right
- + jz .fix_tr_1
- +.do_top:
- + PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5
- + movq mm5, mm4
- + jmp .body
- +.fix_lt_1:
- + movq mm5, mm3
- + pxor mm5, mm4
- + psrlq mm5, 56
- + psllq mm5, 48
- + pxor mm1, mm5
- + jmp .do_left
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2 ; top_right
- + jnz .do_top
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- + jmp .do_top
- +.body
- + lea r1, [r0+r3*2]
- + movq mm0, mm7
- + psllq mm0, 56
- + movq mm1, mm6
- + movq mm2, mm5
- + movq mm3, mm1
- + movq mm4, mm2
- + PALIGNR mm2, mm1, 7, mm5
- + PALIGNR mm1, mm0, 7, mm6
- + lea r2, [r1+r3*2]
- + PALIGNR mm4, mm3, 1, mm7
- + movq mm5, mm3
- + pavgb mm3, mm1
- + PRED4x4_LOWPASS mm0, mm4, mm1, mm5, mm7
- + movq mm4, mm2
- + movq mm1, mm2
- + lea r4, [r2+r3*2]
- + psrlq mm4, 16
- + psrlq mm1, 8
- + PRED4x4_LOWPASS mm6, mm4, mm2, mm1, mm5
- + movq mm7, mm3
- + punpcklbw mm3, mm0
- + punpckhbw mm7, mm0
- + movq mm1, mm7
- + movq mm0, mm7
- + movq mm4, mm7
- + movq [r4+r3*2], mm3
- + PALIGNR mm7, mm3, 2, mm5
- + movq [r4+r3*1], mm7
- + PALIGNR mm1, mm3, 4, mm5
- + movq [r2+r3*2], mm1
- + PALIGNR mm0, mm3, 6, mm3
- + movq [r2+r3*1], mm0
- + movq mm2, mm6
- + movq mm3, mm6
- + movq [r1+r3*2], mm4
- + PALIGNR mm6, mm4, 2, mm5
- + movq [r1+r3*1], mm6
- + PALIGNR mm2, mm4, 4, mm5
- + movq [r0+r3*2], mm2
- + PALIGNR mm3, mm4, 6, mm4
- + movq [r0+r3*1], mm3
- + RET
- +
- +;-----------------------------------------------------------------------------
- ;void pred8x8l_horizontal_down_sse2(uint8_t *src, int has_topleft, int has_topright, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 6c6cf30..cafe7a2 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -70,6 +70,7 @@ void ff_pred8x8l_down_right_mmxext (uint8_t *src, int has_topleft, int has_topri
- void ff_pred8x8l_vertical_right_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_vertical_right_sse2(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_vertical_left_sse2(uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_horizontal_down_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_horizontal_down_sse2(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- @@ -113,6 +114,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_mmxext;
- h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_mmxext;
- h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_mmxext;
- + h->pred8x8l[HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_mmxext;
- h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_mmxext;
- h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_mmxext;
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- @@ -150,7 +152,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- //h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_sse2;
- //h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_sse2;
- h->pred8x8l[VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_sse2;
- - h->pred8x8l[HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_sse2;
- + //h->pred8x8l[HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_sse2;
- if (codec_id == CODEC_ID_VP8) {
- h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_sse2;
- h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_sse2;
- --
- 1.7.2.2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement