Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- From 7d462192dfb66c6b2b3bdbaa841a6a69e5b7848e Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sat, 25 Dec 2010 14:32:11 -0500
- Subject: [PATCH 1/8] pred8x8l_top_dc_mmxext
- ---
- libavcodec/x86/h264_intrapred.asm | 78 ++++++++++++++++++++++++++++-----
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 2 files changed, 68 insertions(+), 12 deletions(-)
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 14a6038..b21516b 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -20,6 +20,7 @@
- ;******************************************************************************
- %include "x86inc.asm"
- +%include "x86util.asm"
- SECTION_RODATA
- @@ -37,6 +38,7 @@ SECTION .text
- cextern pb_1
- cextern pb_3
- +cextern pw_4
- cextern pw_5
- cextern pw_16
- cextern pw_17
- @@ -827,6 +829,70 @@ PRED8x8_H mmx
- PRED8x8_H mmxext
- PRED8x8_H ssse3
- +; dest, left, right, src, tmp
- +; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
- +%macro PRED4x4_LOWPASS 5
- + mova %5, %2
- + pavgb %2, %3
- + pxor %3, %5
- + mova %1, %4
- + pand %3, [pb_1]
- + psubusb %2, %3
- + pavgb %1, %2
- +%endmacro
- +
- +;-----------------------------------------------------------------------------
- +; void pred8x8l_top_dc(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_top_dc_mmxext, 4,4
- + sub r0, r3
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1 ; top_left
- + jz .fix_lt_2
- + test r2, r2 ; top_right
- + jz .fix_tr_1
- +.do_top:
- + PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5
- + jmp .body
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2 ; top_right
- + jnz .do_top
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- + jmp .do_top
- +.body
- + pxor mm1, mm1
- + psadbw mm1, mm0
- + paddw mm1, [pw_4]
- + psrlw mm1, 3
- + pshufw mm1, mm1, 0
- + packuswb mm1, mm1
- +%rep 3
- + movq [r0+r3*1], mm1
- + movq [r0+r3*2], mm1
- + lea r0, [r0+r3*2]
- +%endrep
- + movq [r0+r3*1], mm1
- + movq [r0+r3*2], mm1
- + RET
- +
- ;-----------------------------------------------------------------------------
- ; void pred8x8_dc_rv40(uint8_t *src, int stride)
- ;-----------------------------------------------------------------------------
- @@ -1073,18 +1139,6 @@ cglobal pred4x4_tm_vp8_ssse3, 3,3
- movd [r1+r2*2], mm5
- RET
- -; dest, left, right, src, tmp
- -; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
- -%macro PRED4x4_LOWPASS 5
- - mova %5, %2
- - pavgb %2, %3
- - pxor %3, %5
- - mova %1, %4
- - pand %3, [pb_1]
- - psubusb %2, %3
- - pavgb %1, %2
- -%endmacro
- -
- ;-----------------------------------------------------------------------------
- ; void pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 10a6dd6..aba02ce 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -57,6 +57,7 @@ void ff_pred8x8_tm_vp8_mmx (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride);
- +void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
- @@ -94,6 +95,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext;
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext;
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
- + h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- --
- 1.7.2.2
- From 30e18a52fb0409fd02de4f25e26285084fbb304f Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sat, 25 Dec 2010 14:49:27 -0500
- Subject: [PATCH 2/8] pred8x8l_vertical_mmxext
- ---
- libavcodec/x86/h264_intrapred.asm | 46 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 2 files changed, 48 insertions(+), 0 deletions(-)
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index b21516b..62a16ff 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -842,6 +842,52 @@ PRED8x8_H ssse3
- %endmacro
- ;-----------------------------------------------------------------------------
- +; void pred8x8l_vertical_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_vertical_mmxext, 4,4
- + sub r0, r3
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1 ; top_left
- + jz .fix_lt_2
- + test r2, r2 ; top_right
- + jz .fix_tr_1
- +.do_top:
- + PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5
- + jmp .body
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2 ; top_right
- + jnz .do_top
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- + jmp .do_top
- +.body
- +%rep 3
- + movq [r0+r3*1], mm0
- + movq [r0+r3*2], mm0
- + lea r0, [r0+r3*2]
- +%endrep
- + movq [r0+r3*1], mm0
- + movq [r0+r3*2], mm0
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8l_top_dc(uint8_t *src, int has_topleft, int has_topright, int stride)
- ;-----------------------------------------------------------------------------
- INIT_MMX
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index aba02ce..79bdaec 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -58,6 +58,7 @@ void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride);
- void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_vertical_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
- @@ -96,6 +97,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext;
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
- h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
- + h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_mmxext;
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- --
- 1.7.2.2
- From 5fda00845e1c491ea8706782885737cc0ad15d7e Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sat, 25 Dec 2010 16:21:59 -0500
- Subject: [PATCH 3/8] pred8x8_top_dc_mmxext
- ---
- libavcodec/x86/h264_intrapred.asm | 28 ++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 3 +++
- 2 files changed, 31 insertions(+), 0 deletions(-)
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 62a16ff..3c39b71 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -940,6 +940,34 @@ cglobal pred8x8l_top_dc_mmxext, 4,4
- RET
- ;-----------------------------------------------------------------------------
- +; void pred8x8_top_dc_mmxext(uint8_t *src, int stride)
- +;-----------------------------------------------------------------------------
- +cglobal pred8x8_top_dc_mmxext, 2,2
- + sub r0, r1
- + movq mm0, [r0]
- + pxor mm1, mm1
- + pxor mm2, mm2
- + punpckhbw mm1, mm0
- + punpcklbw mm0, mm2
- + psadbw mm1, mm2 ; s1
- + psadbw mm0, mm2 ; s0
- + psrlw mm1, 1
- + psrlw mm0, 1
- + pavgw mm1, mm2
- + pavgw mm0, mm2
- + pshufw mm1, mm1, 0
- + pshufw mm0, mm0, 0 ; dc0 (w)
- + packuswb mm0, mm1 ; dc0,dc1 (b)
- +%rep 3
- + movq [r0+r1*1], mm0
- + movq [r0+r1*2], mm0
- + lea r0, [r0+r1*2]
- +%endrep
- + movq [r0+r1*1], mm0
- + movq [r0+r1*2], mm0
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8_dc_rv40(uint8_t *src, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 79bdaec..dfa3b3f 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -49,6 +49,7 @@ void ff_pred8x8_vertical_mmx (uint8_t *src, int stride);
- void ff_pred8x8_horizontal_mmx (uint8_t *src, int stride);
- void ff_pred8x8_horizontal_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_horizontal_ssse3 (uint8_t *src, int stride);
- +void ff_pred8x8_top_dc_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_plane_mmx (uint8_t *src, int stride);
- void ff_pred8x8_plane_mmx2 (uint8_t *src, int stride);
- void ff_pred8x8_plane_sse2 (uint8_t *src, int stride);
- @@ -101,6 +102,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- + if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264)
- + h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_mmxext;
- if (codec_id == CODEC_ID_VP8) {
- h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext;
- h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmxext;
- --
- 1.7.2.2
- From 8d6b2365fa48e6dab2957e81702297d56ee67429 Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sat, 25 Dec 2010 17:42:43 -0500
- Subject: [PATCH 4/8] pred8x8l_horizontal_mmxext
- ---
- libavcodec/x86/h264_intrapred.asm | 75 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 2 files changed, 77 insertions(+), 0 deletions(-)
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 3c39b71..5c5af03 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -968,6 +968,81 @@ cglobal pred8x8_top_dc_mmxext, 2,2
- RET
- ;-----------------------------------------------------------------------------
- +; void pred8x8l_horizontal_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_horizontal_mmxext, 4,4
- + sub r0, r3
- + lea r2, [r0+r3*2]
- + movq mm0, [r0+r3*1-8]
- + punpckhbw mm0, [r0+r3*0-8]
- + movq mm1, [r2+r3*1-8]
- + punpckhbw mm1, [r0+r3*2-8]
- + mov r2, r0
- + punpckhwd mm1, mm0
- + lea r0, [r0+r3*4]
- + movq mm2, [r0+r3*1-8]
- + punpckhbw mm2, [r0+r3*0-8]
- + lea r0, [r0+r3*2]
- + movq mm3, [r0+r3*1-8]
- + punpckhbw mm3, [r0+r3*0-8]
- + punpckhwd mm3, mm2
- + punpckhdq mm3, mm1
- + lea r0, [r0+r3*2]
- + movq mm0, [r0+r3*0-8]
- + movq mm1, [r2]
- + mov r0, r2
- + movq mm4, mm3
- + movq mm2, mm3
- + PALIGNR mm4, mm0, 7, mm0
- + PALIGNR mm1, mm2, 1, mm2
- + test r1, r1 ; top_left
- + jz .fix_lt_1
- +.do_left:
- + movq mm0, mm4
- + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
- + movq mm4, mm0
- + movq mm7, mm2
- + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
- + psllq mm1, 56
- + PALIGNR mm7, mm1, 7, mm3
- + movq mm3, mm7
- + jmp .body
- +.fix_lt_1:
- + movq mm5, mm3
- + pxor mm5, mm4
- + psrlq mm5, 56
- + psllq mm5, 48
- + pxor mm1, mm5
- + jmp .do_left
- +.body
- + movq mm7, mm3
- + punpckhbw mm3, mm3
- + punpcklbw mm7, mm7
- + pshufw mm0, mm3, 0xff
- + pshufw mm1, mm3, 0xaa
- + pshufw mm2, mm3, 0x55
- + pshufw mm3, mm3, 0x00
- + pshufw mm4, mm7, 0xff
- + pshufw mm5, mm7, 0xaa
- + pshufw mm6, mm7, 0x55
- + pshufw mm7, mm7, 0x00
- + lea r1, [r0+r3*2]
- + lea r2, [r1+r3*2]
- + movq [r0+r3*1], mm0
- + movq [r0+r3*2], mm1
- + movq [r1+r3*1], mm2
- + movq [r1+r3*2], mm3
- + movq [r2+r3*1], mm4
- + movq [r2+r3*2], mm5
- + lea r0, [r2+r3*2]
- + movq [r0+r3*1], mm6
- + movq [r0+r3*2], mm7
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8_dc_rv40(uint8_t *src, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index dfa3b3f..98a906f 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -60,6 +60,7 @@ void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride);
- void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_vertical_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
- @@ -99,6 +100,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
- h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
- h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_mmxext;
- + h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_mmxext;
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- --
- 1.7.2.2
- From 1713c97a216d9127e6e29d1edddc62dacde6608b Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sat, 25 Dec 2010 19:45:23 -0500
- Subject: [PATCH 5/8] pred8x8l_dc_mmxext
- ---
- libavcodec/x86/h264_intrapred.asm | 98 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 2 files changed, 100 insertions(+), 0 deletions(-)
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 5c5af03..dfe381c 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -40,6 +40,7 @@ cextern pb_1
- cextern pb_3
- cextern pw_4
- cextern pw_5
- +cextern pw_8
- cextern pw_16
- cextern pw_17
- cextern pw_32
- @@ -888,6 +889,103 @@ cglobal pred8x8l_vertical_mmxext, 4,4
- RET
- ;-----------------------------------------------------------------------------
- +;void pred8x8l_dc_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_dc_mmxext, 4,5
- + sub r0, r3
- + lea r4, [r0+r3*2]
- + movq mm0, [r0+r3*1-8]
- + punpckhbw mm0, [r0+r3*0-8]
- + movq mm1, [r4+r3*1-8]
- + punpckhbw mm1, [r0+r3*2-8]
- + mov r4, r0
- + punpckhwd mm1, mm0
- + lea r0, [r0+r3*4]
- + movq mm2, [r0+r3*1-8]
- + punpckhbw mm2, [r0+r3*0-8]
- + lea r0, [r0+r3*2]
- + movq mm3, [r0+r3*1-8]
- + punpckhbw mm3, [r0+r3*0-8]
- + punpckhwd mm3, mm2
- + punpckhdq mm3, mm1
- + lea r0, [r0+r3*2]
- + movq mm0, [r0+r3*0-8]
- + movq mm1, [r4]
- + mov r0, r4
- + movq mm4, mm3
- + movq mm2, mm3
- + PALIGNR mm4, mm0, 7, mm0
- + PALIGNR mm1, mm2, 1, mm2
- + test r1, r1 ; top_left
- + jz .fix_lt_1
- +.do_left:
- + movq mm0, mm4
- + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
- + movq mm4, mm0
- + movq mm7, mm2
- + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
- + psllq mm1, 56
- + PALIGNR mm7, mm1, 7, mm3
- +.check_top:
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1 ; top_left
- + jz .fix_lt_2
- + test r2, r2 ; top_right
- + jz .fix_tr_1
- +.do_top:
- + PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
- + jmp .body
- +.fix_lt_1:
- + movq mm5, mm3
- + pxor mm5, mm4
- + psrlq mm5, 56
- + psllq mm5, 48
- + pxor mm1, mm5
- + jmp .do_left
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2 ; top_right
- + jnz .do_top
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- + jmp .do_top
- +.body
- + pxor mm0, mm0
- + pxor mm1, mm1
- + psadbw mm0, mm7
- + psadbw mm1, mm6
- + paddw mm0, [pw_8]
- + paddw mm0, mm1
- + psrlw mm0, 4
- + pshufw mm0, mm0, 0
- + packuswb mm0, mm0
- +%rep 3
- + movq [r0+r3*1], mm0
- + movq [r0+r3*2], mm0
- + lea r0, [r0+r3*2]
- +%endrep
- + movq [r0+r3*1], mm0
- + movq [r0+r3*2], mm0
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8l_top_dc(uint8_t *src, int has_topleft, int has_topright, int stride)
- ;-----------------------------------------------------------------------------
- INIT_MMX
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 98a906f..b5e6c02 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -58,6 +58,7 @@ void ff_pred8x8_tm_vp8_mmx (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride);
- void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride);
- +void ff_pred8x8l_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_vertical_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- @@ -98,6 +99,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext;
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext;
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
- + h->pred8x8l[DC_PRED ] = ff_pred8x8l_dc_mmxext;
- h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
- h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_mmxext;
- h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_mmxext;
- --
- 1.7.2.2
- From 2334210d134a7e62838f011484738f6548db361d Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sat, 25 Dec 2010 21:06:06 -0500
- Subject: [PATCH 6/8] pred8x8l_horizontal_up_mmxext
- ---
- libavcodec/h264.c | 2 +
- libavcodec/x86/h264_intrapred.asm | 96 +++++++++++++++++++++++++++++++++-
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 3 files changed, 98 insertions(+), 2 deletions(-)
- diff --git a/libavcodec/h264.c b/libavcodec/h264.c
- index 318c1c8..cbcbc42 100644
- --- a/libavcodec/h264.c
- +++ b/libavcodec/h264.c
- @@ -1190,8 +1190,10 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
- h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
- }else{
- const int nnz = h->non_zero_count_cache[ scan8[i] ];
- +START_TIMER;
- h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
- (h->topright_samples_available<<i)&0x4000, linesize);
- +if (dir == HOR_UP_PRED) { STOP_TIMER("pred8x8l_horizontal_up"); }
- if(nnz){
- if(nnz == 1 && h->mb[i*16])
- idct_dc_add(ptr, h->mb + i*16, linesize);
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index dfe381c..901823c 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -1116,19 +1116,19 @@ cglobal pred8x8l_horizontal_mmxext, 4,4
- pxor mm1, mm5
- jmp .do_left
- .body
- + lea r1, [r0+r3*2]
- movq mm7, mm3
- punpckhbw mm3, mm3
- punpcklbw mm7, mm7
- pshufw mm0, mm3, 0xff
- pshufw mm1, mm3, 0xaa
- + lea r2, [r1+r3*2]
- pshufw mm2, mm3, 0x55
- pshufw mm3, mm3, 0x00
- pshufw mm4, mm7, 0xff
- pshufw mm5, mm7, 0xaa
- pshufw mm6, mm7, 0x55
- pshufw mm7, mm7, 0x00
- - lea r1, [r0+r3*2]
- - lea r2, [r1+r3*2]
- movq [r0+r3*1], mm0
- movq [r0+r3*2], mm1
- movq [r1+r3*1], mm2
- @@ -1141,6 +1141,98 @@ cglobal pred8x8l_horizontal_mmxext, 4,4
- RET
- ;-----------------------------------------------------------------------------
- +; void pred8x8l_horizontal_up_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_horizontal_up_mmxext, 4,4
- + sub r0, r3
- + lea r2, [r0+r3*2]
- + movq mm0, [r0+r3*1-8]
- + punpckhbw mm0, [r0+r3*0-8]
- + movq mm1, [r2+r3*1-8]
- + punpckhbw mm1, [r0+r3*2-8]
- + mov r2, r0
- + punpckhwd mm1, mm0
- + lea r0, [r0+r3*4]
- + movq mm2, [r0+r3*1-8]
- + punpckhbw mm2, [r0+r3*0-8]
- + lea r0, [r0+r3*2]
- + movq mm3, [r0+r3*1-8]
- + punpckhbw mm3, [r0+r3*0-8]
- + punpckhwd mm3, mm2
- + punpckhdq mm3, mm1
- + lea r0, [r0+r3*2]
- + movq mm0, [r0+r3*0-8]
- + movq mm1, [r2]
- + mov r0, r2
- + movq mm4, mm3
- + movq mm2, mm3
- + PALIGNR mm4, mm0, 7, mm0
- + PALIGNR mm1, mm2, 1, mm2
- + test r1, r1 ; top_left
- + jz .fix_lt_1
- +.do_left:
- + movq mm0, mm4
- + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
- + movq mm4, mm0
- + movq mm7, mm2
- + PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
- + psllq mm1, 56
- + PALIGNR mm7, mm1, 7, mm3
- + movq mm1, mm7
- + jmp .body
- +.fix_lt_1:
- + movq mm5, mm3
- + pxor mm5, mm4
- + psrlq mm5, 56
- + psllq mm5, 48
- + pxor mm1, mm5
- + jmp .do_left
- +.body
- + lea r1, [r0+r3*2]
- + movq mm1, mm7 ; l0 l1 l2 l3 l4 l5 l6 l7
- + pshufw mm0, mm1, 00011011b ; l6 l7 l4 l5 l2 l3 l0 l1
- + psllq mm1, 56 ; l7 .. .. .. .. .. .. ..
- + movq mm2, mm0
- + psllw mm0, 8
- + psrlw mm2, 8
- + por mm2, mm0 ; l7 l6 l5 l4 l3 l2 l1 l0
- + movq mm3, mm2
- + movq mm4, mm2
- + movq mm5, mm2
- + psrlq mm2, 8
- + psrlq mm3, 16
- + lea r2, [r1+r3*2]
- + por mm2, mm1 ; l7 l7 l6 l5 l4 l3 l2 l1
- + punpckhbw mm1, mm1
- + por mm3, mm1 ; l7 l7 l7 l6 l5 l4 l3 l2
- + pavgb mm4, mm2
- + PRED4x4_LOWPASS mm1, mm3, mm5, mm2, mm6
- + movq mm5, mm4
- + punpcklbw mm4, mm1 ; p4 p3 p2 p1
- + punpckhbw mm5, mm1 ; p8 p7 p6 p5
- + movq mm6, mm5
- + movq mm7, mm5
- + movq mm0, mm5
- + PALIGNR mm5, mm4, 2, mm1
- + pshufw mm1, mm6, 11111001b
- + PALIGNR mm6, mm4, 4, mm2
- + pshufw mm2, mm7, 11111110b
- + PALIGNR mm7, mm4, 6, mm3
- + pshufw mm3, mm0, 11111111b
- + movq [r0+r3*1], mm4
- + movq [r0+r3*2], mm5
- + lea r0, [r2+r3*2]
- + movq [r1+r3*1], mm6
- + movq [r1+r3*2], mm7
- + movq [r2+r3*1], mm0
- + movq [r2+r3*2], mm1
- + movq [r0+r3*1], mm2
- + movq [r0+r3*2], mm3
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8_dc_rv40(uint8_t *src, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index b5e6c02..4877919 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -62,6 +62,7 @@ void ff_pred8x8l_dc_mmxext (uint8_t *src, int has_topleft, int has_topri
- void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_vertical_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_horizontal_up_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
- @@ -103,6 +104,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- h->pred8x8l[TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
- h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_mmxext;
- h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_mmxext;
- + h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_mmxext;
- h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext;
- if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264)
- h->pred4x4 [DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_mmxext;
- --
- 1.7.2.2
- From 3452703f64c49b467c6b6664b1b8ebe9d7a854cc Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sat, 25 Dec 2010 23:41:50 -0500
- Subject: [PATCH 7/8] pred8x8l_down_left_mmxext
- ---
- libavcodec/h264.c | 2 +-
- libavcodec/x86/h264_intrapred.asm | 91 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 3 files changed, 94 insertions(+), 1 deletions(-)
- diff --git a/libavcodec/h264.c b/libavcodec/h264.c
- index cbcbc42..f0d314e 100644
- --- a/libavcodec/h264.c
- +++ b/libavcodec/h264.c
- @@ -1193,7 +1193,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
- START_TIMER;
- h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
- (h->topright_samples_available<<i)&0x4000, linesize);
- -if (dir == HOR_UP_PRED) { STOP_TIMER("pred8x8l_horizontal_up"); }
- +if (dir == DIAG_DOWN_LEFT_PRED) { STOP_TIMER("pred8x8l_down_left"); }
- if(nnz){
- if(nnz == 1 && h->mb[i*16])
- idct_dc_add(ptr, h->mb + i*16, linesize);
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 901823c..788cbc9 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -1233,6 +1233,97 @@ cglobal pred8x8l_horizontal_up_mmxext, 4,4
- RET
- ;-----------------------------------------------------------------------------
- +;void pred8x8l_down_left_sse2(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_down_left_sse2, 4,4
- + sub r0, r3
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1 ; top_left
- + jz .fix_lt_2
- + test r2, r2 ; top_right
- + jz .fix_tr_1
- +.do_top:
- + PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5
- + movq mm7, mm4
- + test r2, r2 ; top_right
- + jz .fix_tr_2
- + movq mm0, [r0+8]
- + movq mm5, mm0
- + movq mm2, mm0
- + movq mm4, mm0
- + psrlq mm5, 56
- + PALIGNR mm2, mm3, 7, mm3
- + PALIGNR mm5, mm4, 1, mm4
- + PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4
- + jmp .do_topright
- +.fix_tr_2:
- + punpckhbw mm3, mm3
- + pshufw mm1, mm3, 0xFF
- +.do_topright:
- + movq mm6, mm1
- + psrlq mm1, 56
- + movq mm5, mm1
- + jmp .body
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2 ; top_right
- + jnz .do_top
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- + jmp .do_top
- +.body
- + lea r1, [r0+r3*2]
- + movq2dq xmm3, mm7
- + movq2dq xmm4, mm6
- + pslldq xmm4, 8
- + por xmm3, xmm4
- + movdqa xmm2, xmm3
- + psrldq xmm2, 1
- + movq2dq xmm5, mm5
- + pslldq xmm5, 15
- + por xmm2, xmm5
- + lea r2, [r1+r3*2]
- + movdqa xmm1, xmm3
- + pslldq xmm1, 1
- +INIT_XMM
- + PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm4
- + psrldq xmm0, 1
- + movq [r0+r3*1], xmm0
- + psrldq xmm0, 1
- + movq [r0+r3*2], xmm0
- + psrldq xmm0, 1
- + lea r0, [r2+r3*2]
- + movq [r1+r3*1], xmm0
- + psrldq xmm0, 1
- + movq [r1+r3*2], xmm0
- + psrldq xmm0, 1
- + movq [r2+r3*1], xmm0
- + psrldq xmm0, 1
- + movq [r2+r3*2], xmm0
- + psrldq xmm0, 1
- + movq [r0+r3*1], xmm0
- + psrldq xmm0, 1
- + movq [r0+r3*2], xmm0
- + RET
- +
- +;-----------------------------------------------------------------------------
- ; void pred8x8_dc_rv40(uint8_t *src, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 4877919..4bf9f78 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -63,6 +63,7 @@ void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topri
- void ff_pred8x8l_vertical_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_horizontal_up_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_down_left_sse2 (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
- @@ -134,6 +135,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- if (mm_flags & AV_CPU_FLAG_SSE2) {
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2;
- + h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_sse2;
- if (codec_id == CODEC_ID_VP8) {
- h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_sse2;
- h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_sse2;
- --
- 1.7.2.2
- From 66b03d80b08ace9f0d99b12c79cc111a6950c166 Mon Sep 17 00:00:00 2001
- From: Daniel Kang <daniel.d.kang@gmail.com>
- Date: Sun, 26 Dec 2010 18:07:58 -0500
- Subject: [PATCH 8/8] pred8x8l_vertical_right_sse2
- ---
- libavcodec/h264.c | 2 +-
- libavcodec/x86/h264_intrapred.asm | 117 ++++++++++++++++++++++++++++++++++
- libavcodec/x86/h264_intrapred_init.c | 2 +
- 3 files changed, 120 insertions(+), 1 deletions(-)
- diff --git a/libavcodec/h264.c b/libavcodec/h264.c
- index f0d314e..8353b51 100644
- --- a/libavcodec/h264.c
- +++ b/libavcodec/h264.c
- @@ -1193,7 +1193,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
- START_TIMER;
- h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
- (h->topright_samples_available<<i)&0x4000, linesize);
- -if (dir == DIAG_DOWN_LEFT_PRED) { STOP_TIMER("pred8x8l_down_left"); }
- +if (dir == VERT_RIGHT_PRED) { STOP_TIMER("pred8x8l_vertical_right"); }
- if(nnz){
- if(nnz == 1 && h->mb[i*16])
- idct_dc_add(ptr, h->mb + i*16, linesize);
- diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
- index 788cbc9..1327ecb 100644
- --- a/libavcodec/x86/h264_intrapred.asm
- +++ b/libavcodec/x86/h264_intrapred.asm
- @@ -25,6 +25,7 @@
- SECTION_RODATA
- tm_shuf: times 8 db 0x03, 0x80
- +pw_ff00: times 8 dw 0xff00
- plane_shuf: db -8, -7, -6, -5, -4, -3, -2, -1
- db 1, 2, 3, 4, 5, 6, 7, 8
- plane8_shuf: db -4, -3, -2, -1, 0, 0, 0, 0
- @@ -1233,6 +1234,122 @@ cglobal pred8x8l_horizontal_up_mmxext, 4,4
- RET
- ;-----------------------------------------------------------------------------
- +; void pred8x8l_vertical_right_sse2(uint8_t *src, int has_topleft, int has_topright, int stride)
- +;-----------------------------------------------------------------------------
- +
- +INIT_MMX
- +%define PALIGNR PALIGNR_MMX
- +cglobal pred8x8l_vertical_right_sse2, 4,5,7
- + sub r0, r3
- + lea r4, [r0+r3*2]
- + movq mm0, [r0+r3*1-8]
- + punpckhbw mm0, [r0+r3*0-8]
- + movq mm1, [r4+r3*1-8]
- + punpckhbw mm1, [r0+r3*2-8]
- + mov r4, r0
- + punpckhwd mm1, mm0
- + lea r0, [r0+r3*4]
- + movq mm2, [r0+r3*1-8]
- + punpckhbw mm2, [r0+r3*0-8]
- + lea r0, [r0+r3*2]
- + movq mm3, [r0+r3*1-8]
- + punpckhbw mm3, [r0+r3*0-8]
- + punpckhwd mm3, mm2
- + punpckhdq mm3, mm1
- + lea r0, [r0+r3*2]
- + movq mm0, [r0+r3*0-8]
- + movq mm1, [r4]
- + mov r0, r4
- + movq mm4, mm3
- + movq mm2, mm3
- + PALIGNR mm4, mm0, 7, mm0
- + PALIGNR mm1, mm2, 1, mm2
- + test r1, r1 ; top_left
- + jz .fix_lt_1
- +.do_left:
- + movq mm0, mm4
- + PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
- + movq mm7, mm2
- + movq mm0, [r0-8]
- + movq mm3, [r0]
- + movq mm1, [r0+8]
- + movq mm2, mm3
- + movq mm4, mm3
- + PALIGNR mm2, mm0, 7, mm0
- + PALIGNR mm1, mm4, 1, mm4
- + test r1, r1 ; top_left
- + jz .fix_lt_2
- + test r2, r2 ; top_right
- + jz .fix_tr_1
- +.do_top
- + PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
- + jmp .body
- +.fix_lt_2:
- + movq mm5, mm3
- + pxor mm5, mm2
- + psllq mm5, 56
- + psrlq mm5, 56
- + pxor mm2, mm5
- + test r2, r2 ; top_right
- + jnz .do_top
- +.fix_lt_1:
- + movq mm5, mm3
- + pxor mm5, mm4
- + psrlq mm5, 56
- + psllq mm5, 48
- + pxor mm1, mm5
- + jmp .do_left
- +.fix_tr_1:
- + movq mm5, mm3
- + pxor mm5, mm1
- + psrlq mm5, 56
- + psllq mm5, 56
- + pxor mm1, mm5
- + jmp .do_top
- +.body
- + lea r1, [r0+r3*2]
- + movq2dq xmm0, mm7
- + movq2dq xmm4, mm6
- + pslldq xmm4, 8
- + por xmm0, xmm4
- + movdqa xmm6, [pw_ff00]
- + movdqa xmm1, xmm0
- + lea r2, [r1+r3*2]
- + movdqa xmm2, xmm0
- + movdqa xmm3, xmm0
- + pslldq xmm0, 1
- + pslldq xmm1, 2
- + pavgb xmm2, xmm0
- +INIT_XMM
- + PRED4x4_LOWPASS xmm4, xmm3, xmm1, xmm0, xmm5
- + pandn xmm6, xmm4
- + movdqa xmm5, xmm4
- + psrlw xmm4, 8
- + packuswb xmm6, xmm4
- + movhlps xmm4, xmm6
- + movhps [r0+r3*2], xmm5
- + movhps [r0+r3*1], xmm2
- + psrldq xmm5, 4
- + movss xmm5, xmm6
- + psrldq xmm2, 4
- + movss xmm2, xmm4
- + lea r0, [r2+r3*2]
- +
- + psrldq xmm5, 1
- + psrldq xmm2, 1
- + movq [r0+r3*2], xmm5
- + movq [r0+r3*1], xmm2
- + psrldq xmm5, 1
- + psrldq xmm2, 1
- + movq [r2+r3*2], xmm5
- + movq [r2+r3*1], xmm2
- + psrldq xmm5, 1
- + psrldq xmm2, 1
- + movq [r1+r3*2], xmm5
- + movq [r1+r3*1], xmm2
- + RET
- +
- +;-----------------------------------------------------------------------------
- ;void pred8x8l_down_left_sse2(uint8_t *src, int has_topleft, int has_topright, int stride)
- ;-----------------------------------------------------------------------------
- diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
- index 4bf9f78..1c5dcc5 100644
- --- a/libavcodec/x86/h264_intrapred_init.c
- +++ b/libavcodec/x86/h264_intrapred_init.c
- @@ -64,6 +64,7 @@ void ff_pred8x8l_vertical_mmxext (uint8_t *src, int has_topleft, int has_topri
- void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_horizontal_up_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred8x8l_down_left_sse2 (uint8_t *src, int has_topleft, int has_topright, int stride);
- +void ff_pred8x8l_vertical_right_sse2(uint8_t *src, int has_topleft, int has_topright, int stride);
- void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride);
- void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride);
- @@ -136,6 +137,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
- if (mm_flags & AV_CPU_FLAG_SSE2) {
- h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2;
- h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_sse2;
- + h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_sse2;
- if (codec_id == CODEC_ID_VP8) {
- h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_sse2;
- h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_sse2;
- --
- 1.7.2.2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement